IR: Initial implementation of FP{Double,Single}ToFixed{S,U}{32,64}

This implementation just falls-back to the software floating point implementation.
2018-06-30 10:49:47 +01:00 · 2018-06-30 10:49:47 +01:00 · caaf36dfd6
commit caaf36dfd6
parent 760cc3ca89
12 changed files with 159 additions and 173 deletions
--- a/src/backend_x64/a32_jitstate.cpp
+++ b/src/backend_x64/a32_jitstate.cpp
@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
    FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    FPSCR |= FPSCR_IDC;
    FPSCR |= FPSCR_UFC;
+    FPSCR |= fpsr_exc;

    return FPSCR;
 }
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
    const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
    guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];

-    // Cumulative flags IOC, IXC, UFC, OFC, DZC
-    guest_MXCSR |= ( FPSCR     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    // Cumulative flag IDC, UFC
-    FPSCR_IDC = FPSCR & (1 << 7);
-    FPSCR_UFC = FPSCR & (1 << 3);
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = FPSCR & 0x9F;

    if (Common::Bit<24>(FPSCR)) {
        // VFP Flush to Zero
--- a/src/backend_x64/a32_jitstate.h
+++ b/src/backend_x64/a32_jitstate.h
@ -66,6 +66,7 @@ struct A32JitState {
    std::array<u64, RSBSize> rsb_codeptrs;
    void ResetRSB();

+    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 FPSCR_mode = 0;
--- a/src/backend_x64/a64_jitstate.cpp
+++ b/src/backend_x64/a64_jitstate.cpp
@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
    fpsr |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    fpsr |= FPSCR_IDC;
    fpsr |= FPSCR_UFC;
+    fpsr |= fpsr_exc;
    return fpsr;
 }

 void A64JitState::SetFpsr(u32 value) {
    guest_MXCSR &= ~0x0000003D;
-    guest_MXCSR |= ( value     ) & 0b0000000000001;  // IE = IOC
-    guest_MXCSR |= ( value << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
-
-    FPSCR_IDC = value & (1 << 7);
-    FPSCR_UFC = value & (1 << 3);
+    FPSCR_IDC = 0;
+    FPSCR_UFC = 0;
+    fpsr_exc = value & 0x9F;
 }

 } // namespace Dynarmic::BackendX64
--- a/src/backend_x64/a64_jitstate.h
+++ b/src/backend_x64/a64_jitstate.h
@ -71,6 +71,7 @@ struct A64JitState {
        rsb_codeptrs.fill(0);
    }

+    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 fpcr = 0;
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -5,13 +5,22 @@
 */

 #include <type_traits>
+#include <utility>

 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/fp/op.h"
 #include "common/fp/util.h"
+#include "common/mp/cartesian_product.h"
+#include "common/mp/integer.h"
+#include "common/mp/list.h"
+#include "common/mp/lut.h"
+#include "common/mp/to_tuple.h"
+#include "common/mp/vlift.h"
+#include "common/mp/vllift.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -19,6 +28,7 @@
 namespace Dynarmic::BackendX64 {

 using namespace Xbyak::util;
+namespace mp = Dynarmic::Common::mp;

 constexpr u64 f32_negative_zero = 0x80000000u;
 constexpr u64 f32_nan = 0x7fc00000u;
@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u;
 constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu;

 constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu;
-constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
-constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
-constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
-constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double

 static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
    Xbyak::Label end;
@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
    code.L(end);
 }

-static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
-    code.pxor(xmm_scratch, xmm_scratch);
-    code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
-    code.pand(xmm_value, xmm_scratch);
-}
-
 static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) {
    Xbyak::Label nan;

@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, result);
 }

-void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
+static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
+    using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
+    using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
+    using rounding_list = mp::list<
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
+    >;
+
+    using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
+    using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
+
+    static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
+        [](auto args) {
+            return std::pair<key_type, value_type>{
+                mp::to_tuple<decltype(args)>,
+                static_cast<value_type>(
+                    [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
+                        constexpr auto t = mp::to_tuple<decltype(args)>;
+                        constexpr size_t fsize = std::get<0>(t);
+                        constexpr bool unsigned_ = std::get<1>(t);
+                        constexpr size_t isize = std::get<2>(t);
+                        constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
+                        using InputSize = mp::unsigned_integer_of_size<fsize>;
+
+                        return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
+                    }
+                )
+            };
+        },
+        mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
+    );
+
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();

-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for clamping.
+    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());

-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero32(code, from, to);
-    }
-    code.cvtss2sd(from, from);
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+    ctx.reg_alloc.HostCall(inst, args[0], args[1]);
+    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
+    code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
+    code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
 }

-void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // Conversion to double is lossless, and allows for accurate clamping.
-    //
-    // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
-    //
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
-    }
-    code.cvtss2sd(from, from);
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, false, 32);
 }

-void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
-    bool round_towards_zero = args[1].GetImmediateU1();
-
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-
-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, gpr_scratch.cvt64());
-    }
-    // First time is to set flags
-    if (round_towards_zero) {
-        code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
-    // Second time is for real
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 32 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
-    }
-
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, false, 64);
 }

-void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
-    bool round_towards_zero = args[1].GetImmediateU1();
+void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, true, 32);
+}

-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
-    // TODO: Use VCVTPD2UDQ when AVX512VL is available.
-    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
+void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 64, true, 64);
+}

-    if (ctx.FPSCR_FTZ()) {
-        DenormalsAreZero64(code, from, to);
-    }
-    // Clamp to output range
-    ZeroIfNaN64(code, from, xmm_scratch);
-    code.minsd(from, code.MConst(xword, f64_max_u32));
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
-    if (round_towards_zero) {
-        code.cvttsd2si(to, from); // 64 bit gpr
-    } else {
-        code.cvtsd2si(to, from); // 64 bit gpr
-    }
+void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, false, 32);
+}

-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, false, 64);
+}
+
+void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, true, 32);
+}
+
+void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
+    EmitFPToFixedFallback(code, ctx, inst, 32, true, 64);
 }

 void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
--- a/src/backend_x64/jitstate_info.h
+++ b/src/backend_x64/jitstate_info.h
@ -26,6 +26,7 @@ struct JitStateInfo {
        , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
        , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
        , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
+        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
    {}

    const size_t offsetof_cycles_remaining;
@ -39,6 +40,7 @@ struct JitStateInfo {
    const size_t offsetof_CPSR_nzcv;
    const size_t offsetof_FPSCR_IDC;
    const size_t offsetof_FPSCR_UFC;
+    const size_t offsetof_fpsr_exc;
 };

 } // namespace Dynarmic::BackendX64
--- a/src/frontend/A32/translate/translate_arm/vfp2.cpp
+++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp
@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToU32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToU32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToS32(reg_m, round_towards_zero, true)
-                      : ir.FPSingleToS32(reg_m, round_towards_zero, true);
+                      ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
+                      : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
--- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec

    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec

    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    IR::U32U64 intval;

    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    IR::U32U64 intval;

    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
    return Inst<U64>(Opcode::FPSingleToDouble, a);
 }

-U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero));
+U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

-U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
-    ASSERT(fpscr_controlled);
-    return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero));
+U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 32);
+    return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
+}
+
+U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
+    ASSERT(fbits <= 64);
+    return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }

 U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -12,6 +12,10 @@
 #include "frontend/ir/terminal.h"
 #include "frontend/ir/value.h"

+namespace Dynarmic::FP {
+enum class RoundingMode;
+} // namespace Dynarmic::FP
+
 // ARM JIT Microinstruction Intermediate Representation
 //
 // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@ -264,10 +268,14 @@ public:
    U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
    U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
-    U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
-    U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
+    U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
+    U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -386,10 +386,14 @@ OPCODE(FPSub64,                             T::U64,         T::U64,         T::U
 // Floating-point conversions
 OPCODE(FPSingleToDouble,                    T::U64,         T::U32                                          )
 OPCODE(FPDoubleToSingle,                    T::U32,         T::U64                                          )
-OPCODE(FPSingleToU32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPSingleToS32,                       T::U32,         T::U32,         T::U1                           )
-OPCODE(FPDoubleToU32,                       T::U32,         T::U64,         T::U1                           )
-OPCODE(FPDoubleToS32,                       T::U32,         T::U64,         T::U1                           )
+OPCODE(FPDoubleToFixedS32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedS64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU32,                  T::U32,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPDoubleToFixedU64,                  T::U64,         T::U64,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedS64,                  T::U64,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU32,                  T::U32,         T::U32,         T::U8,          T::U8           )
+OPCODE(FPSingleToFixedU64,                  T::U64,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPU32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPS32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPU32ToDouble,                       T::U64,         T::U32,         T::U1                           )