Merge pull request #289 from MerryMage/fptofixed

Implement most of the scalar fp -> integer instructions
2018-07-15 17:12:52 +01:00 · 2018-07-15 17:12:52 +01:00 · d50eaedaa7
commit d50eaedaa7
parent e7409fdfe4 304cc7f61e
52 changed files with 1931 additions and 217 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -16,8 +16,17 @@ add_library(dynarmic
    common/common_types.h
    common/crc32.cpp
    common/crc32.h
-    common/fp_util.h
+    common/fp/fpsr.h
    common/fp/info.h
    common/fp/mantissa_util.h
    common/fp/op.cpp
    common/fp/op.h
    common/fp/process_exception.cpp
    common/fp/process_exception.h
    common/fp/rounding_mode.h
    common/fp/unpacked.cpp
    common/fp/unpacked.h
    common/fp/util.h
    common/intrusive_list.h
    common/iterator_util.h
    common/llvm_disassemble.cpp
@ -27,10 +36,24 @@ add_library(dynarmic
    common/memory_pool.cpp
    common/memory_pool.h
    common/mp.h
    common/mp/append.h
    common/mp/bind.h
    common/mp/cartesian_product.h
    common/mp/concat.h
    common/mp/fapply.h
    common/mp/fmap.h
    common/mp/list.h
    common/mp/lut.h
    common/mp/to_tuple.h
    common/mp/vlift.h
    common/mp/vllift.h
    common/safe_ops.h
    common/scope_exit.h
    common/sm4.cpp
    common/sm4.h
    common/string_util.h
    common/u128.cpp
    common/u128.h
    common/variant_util.h
    frontend/A32/decoder/arm.h
    frontend/A32/decoder/thumb16.h
--- a/src/backend_x64/a32_emit_x64.cpp
+++ b/src/backend_x64/a32_emit_x64.cpp
@ -62,6 +62,10 @@ FP::RoundingMode A32EmitContext::FPSCR_RMode() const {
    return Location().FPSCR().RMode();
 }
 u32 A32EmitContext::FPCR() const {
    return Location().FPSCR().Value();
 }
 bool A32EmitContext::FPSCR_RoundTowardsZero() const {
    return Location().FPSCR().RMode() != FP::RoundingMode::TowardsZero;
 }
--- a/src/backend_x64/a32_emit_x64.h
+++ b/src/backend_x64/a32_emit_x64.h
@ -24,6 +24,7 @@ struct A32EmitContext final : public EmitContext {
    A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
    A32::LocationDescriptor Location() const;
    FP::RoundingMode FPSCR_RMode() const override;
    u32 FPCR() const override;
    bool FPSCR_RoundTowardsZero() const override;
    bool FPSCR_FTZ() const override;
    bool FPSCR_DN() const override;
--- a/src/backend_x64/a32_jitstate.cpp
+++ b/src/backend_x64/a32_jitstate.cpp
@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const {
    FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    FPSCR |= FPSCR_IDC;
    FPSCR |= FPSCR_UFC;
    FPSCR |= fpsr_exc;
    return FPSCR;
 }
@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) {
    const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
    guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
-    // Cumulative flags IOC, IXC, UFC, OFC, DZC
+    // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
-    guest_MXCSR |= ( FPSCR     ) & 0b0000000000001;  // IE = IOC
+    FPSCR_IDC = 0;
-    guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
+    FPSCR_UFC = 0;
-
+    fpsr_exc = FPSCR & 0x9F;
    // Cumulative flag IDC, UFC
    FPSCR_IDC = FPSCR & (1 << 7);
    FPSCR_UFC = FPSCR & (1 << 3);
    if (Common::Bit<24>(FPSCR)) {
        // VFP Flush to Zero
--- a/src/backend_x64/a32_jitstate.h
+++ b/src/backend_x64/a32_jitstate.h
@ -66,6 +66,7 @@ struct A32JitState {
    std::array<u64, RSBSize> rsb_codeptrs;
    void ResetRSB();
    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 FPSCR_mode = 0;
--- a/src/backend_x64/a64_emit_x64.cpp
+++ b/src/backend_x64/a64_emit_x64.cpp
@ -44,6 +44,10 @@ FP::RoundingMode A64EmitContext::FPSCR_RMode() const {
    return Location().FPCR().RMode();
 }
 u32 A64EmitContext::FPCR() const {
    return Location().FPCR().Value();
 }
 bool A64EmitContext::FPSCR_RoundTowardsZero() const {
    return Location().FPCR().RMode() != FP::RoundingMode::TowardsZero;
 }
--- a/src/backend_x64/a64_emit_x64.h
+++ b/src/backend_x64/a64_emit_x64.h
@ -24,6 +24,7 @@ struct A64EmitContext final : public EmitContext {
    A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
    A64::LocationDescriptor Location() const;
    FP::RoundingMode FPSCR_RMode() const override;
    u32 FPCR() const override;
    bool FPSCR_RoundTowardsZero() const override;
    bool FPSCR_FTZ() const override;
    bool FPSCR_DN() const override;
--- a/src/backend_x64/a64_jitstate.cpp
+++ b/src/backend_x64/a64_jitstate.cpp
@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const {
    fpsr |= (guest_MXCSR & 0b0000000111100) >> 1;  // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
    fpsr |= FPSCR_IDC;
    fpsr |= FPSCR_UFC;
    fpsr |= fpsr_exc;
    return fpsr;
 }
 void A64JitState::SetFpsr(u32 value) {
    guest_MXCSR &= ~0x0000003D;
-    guest_MXCSR |= ( value     ) & 0b0000000000001;  // IE = IOC
+    FPSCR_IDC = 0;
-    guest_MXCSR |= ( value << 1) & 0b0000000111100;  // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
+    FPSCR_UFC = 0;
-
+    fpsr_exc = value & 0x9F;
    FPSCR_IDC = value & (1 << 7);
    FPSCR_UFC = value & (1 << 3);
 }
 } // namespace Dynarmic::BackendX64
--- a/src/backend_x64/a64_jitstate.h
+++ b/src/backend_x64/a64_jitstate.h
@ -71,6 +71,7 @@ struct A64JitState {
        rsb_codeptrs.fill(0);
    }
    u32 fpsr_exc = 0;
    u32 FPSCR_IDC = 0;
    u32 FPSCR_UFC = 0;
    u32 fpcr = 0;
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@ -35,6 +35,7 @@ struct EmitContext {
    void EraseInstruction(IR::Inst* inst);
    virtual FP::RoundingMode FPSCR_RMode() const = 0;
    virtual u32 FPCR() const = 0;
    virtual bool FPSCR_RoundTowardsZero() const = 0;
    virtual bool FPSCR_FTZ() const = 0;
    virtual bool FPSCR_DN() const = 0;
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -5,13 +5,22 @@
 */
 #include <type_traits>
 #include <utility>
 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
-#include "common/fp_util.h"
+#include "common/fp/op.h"
 #include "common/fp/util.h"
 #include "common/mp/cartesian_product.h"
 #include "common/mp/integer.h"
 #include "common/mp/list.h"
 #include "common/mp/lut.h"
 #include "common/mp/to_tuple.h"
 #include "common/mp/vlift.h"
 #include "common/mp/vllift.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -19,6 +28,7 @@
 namespace Dynarmic::BackendX64 {
 using namespace Xbyak::util;
 namespace mp = Dynarmic::Common::mp;
 constexpr u64 f32_negative_zero = 0x80000000u;
 constexpr u64 f32_nan = 0x7fc00000u;
@ -33,6 +43,10 @@ constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double
 constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
 constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
 constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double
 constexpr u64 f64_min_s64 = 0xc3e0000000000000u; // -2^63 as a double
 constexpr u64 f64_max_s64_lim = 0x43e0000000000000u; // 2^63 as a double (actual maximum unrepresentable)
 constexpr u64 f64_min_u64 = 0x0000000000000000u; // 0 as a double
 constexpr u64 f64_max_u64_lim = 0x43f0000000000000u; // 2^64 as a double (actual maximum unrepresentable)
 static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
    Xbyak::Label end;
@ -120,7 +134,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movd(code.ABI_PARAM1.cvt32(), a);
    code.movd(code.ABI_PARAM2.cvt32(), b);
    code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
    }));
    code.movd(a, code.ABI_RETURN.cvt32());
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -149,7 +163,7 @@ static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movd(code.ABI_PARAM2.cvt32(), b);
    code.movd(code.ABI_PARAM3.cvt32(), c);
    code.CallFunction(static_cast<u32(*)(u32, u32, u32)>([](u32 a, u32 b, u32 c) -> u32 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
    }));
    code.movd(a, code.ABI_RETURN.cvt32());
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -187,7 +201,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movq(code.ABI_PARAM1, a);
    code.movq(code.ABI_PARAM2, b);
    code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
-        return *Common::ProcessNaNs(a, b);
+        return *FP::ProcessNaNs(a, b);
    }));
    code.movq(a, code.ABI_RETURN);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -213,7 +227,7 @@ static void PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbya
    code.movq(code.ABI_PARAM2, b);
    code.movq(code.ABI_PARAM3, c);
    code.CallFunction(static_cast<u64(*)(u64, u64, u64)>([](u64 a, u64 b, u64 c) -> u64 {
-        return *Common::ProcessNaNs(a, b, c);
+        return *FP::ProcessNaNs(a, b, c);
    }));
    code.movq(a, code.ABI_RETURN);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
@ -892,129 +906,160 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
    ctx.reg_alloc.DefineValue(inst, result);
 }
-void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) {
+static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
    bool round_towards_zero = args[1].GetImmediateU1();
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
+    const size_t fbits = args[1].GetImmediateU8();
-    // Conversion to double is lossless, and allows for clamping.
+    const auto rounding = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
-    if (ctx.FPSCR_FTZ()) {
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero){
-        DenormalsAreZero32(code, from, to);
+        const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
        const int round_imm = [&]{
            switch (rounding) {
            case FP::RoundingMode::ToNearest_TieEven:
            default:
                return 0b00;
            case FP::RoundingMode::TowardsPlusInfinity:
                return 0b10;
            case FP::RoundingMode::TowardsMinusInfinity:
                return 0b01;
            case FP::RoundingMode::TowardsZero:
                return 0b11;
            }
-    code.cvtss2sd(from, from);
+        }();
-    // First time is to set flags
+
-    if (round_towards_zero) {
+        const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm();
-        code.cvttsd2si(to, from); // 32 bit gpr
+        const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
        if (fsize == 64) {
            if (fbits != 0) {
                const u64 scale_factor = static_cast<u64>((fbits + 1023) << 52);
                code.mulsd(src, code.MConst(xword, scale_factor));
            }
            code.roundsd(src, src, round_imm);
            ZeroIfNaN64(code, src, scratch);
        } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
+            if (fbits != 0) {
                const u32 scale_factor = static_cast<u32>((fbits + 127) << 23);
                code.mulss(src, code.MConst(xword, scale_factor));
            }
-    // Clamp to output range
+
-    ZeroIfNaN64(code, from, xmm_scratch);
+            code.roundss(src, src, round_imm);
-    code.minsd(from, code.MConst(xword, f64_max_s32));
+            code.cvtss2sd(src, src);
-    code.maxsd(from, code.MConst(xword, f64_min_s32));
+            ZeroIfNaN64(code, src, scratch);
-    // Second time is for real
+        }
-    if (round_towards_zero) {
+
-        code.cvttsd2si(to, from); // 32 bit gpr
+        if (isize == 64) {
            Xbyak::Label saturate_max, end;
            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u64 : f64_min_s64));
            code.movsd(scratch, code.MConst(xword, unsigned_ ? f64_max_u64_lim : f64_max_s64_lim));
            code.comisd(scratch, src);
            code.jna(saturate_max, code.T_NEAR);
            if (unsigned_) {
                Xbyak::Label below_max;
                code.movsd(scratch, code.MConst(xword, f64_max_s64_lim));
                code.comisd(src, scratch);
                code.jb(below_max);
                code.subsd(src, scratch);
                code.cvttsd2si(result, src);
                code.btc(result, 63);
                code.jmp(end);
                code.L(below_max);
            }
            code.cvttsd2si(result, src); // 64 bit gpr
            code.L(end);
            code.SwitchToFarCode();
            code.L(saturate_max);
            code.mov(result, unsigned_ ? 0xFFFF'FFFF'FFFF'FFFF : 0x7FFF'FFFF'FFFF'FFFF);
            code.jmp(end, code.T_NEAR);
            code.SwitchToNearCode();
        } else {
-        code.cvtsd2si(to, from); // 32 bit gpr
+            code.minsd(src, code.MConst(xword, unsigned_ ? f64_max_u32 : f64_max_s32));
            code.maxsd(src, code.MConst(xword, unsigned_ ? f64_min_u32 : f64_min_s32));
            code.cvttsd2si(result, src); // 64 bit gpr
        }
-    ctx.reg_alloc.DefineValue(inst, to);
+        ctx.reg_alloc.DefineValue(inst, result);
        return;
    }
-void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) {
+    using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    using unsigned_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
-    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
+    using isize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
-    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
+    using rounding_list = mp::list<
-    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
-    bool round_towards_zero = args[1].GetImmediateU1();
+        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
        std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
        std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
    >;
-    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
+    using key_type = std::tuple<size_t, bool, size_t, FP::RoundingMode>;
-    // Conversion to double is lossless, and allows for accurate clamping.
+    using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR);
    //
    // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion.
    //
    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
-    if (ctx.FPSCR_FTZ()) {
+    static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
-        DenormalsAreZero64(code, from, to);
+        [](auto args) {
            return std::pair<key_type, value_type>{
                mp::to_tuple<decltype(args)>,
                static_cast<value_type>(
                    [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) {
                        constexpr auto t = mp::to_tuple<decltype(args)>;
                        constexpr size_t fsize = std::get<0>(t);
                        constexpr bool unsigned_ = std::get<1>(t);
                        constexpr size_t isize = std::get<2>(t);
                        constexpr FP::RoundingMode rounding_mode = std::get<3>(t);
                        using InputSize = mp::unsigned_integer_of_size<fsize>;
                        return FP::FPToFixed<InputSize>(isize, static_cast<InputSize>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
                    }
-    code.cvtss2sd(from, from);
+                )
-    // Clamp to output range
+            };
-    ZeroIfNaN64(code, from, xmm_scratch);
+        },
-    code.minsd(from, code.MConst(xword, f64_max_u32));
+        mp::cartesian_product<fsize_list, unsigned_list, isize_list, rounding_list>{}
-    code.maxsd(from, code.MConst(xword, f64_min_u32));
+    );
-    if (round_towards_zero) {
+
-        code.cvttsd2si(to, from); // 64 bit gpr
+    ctx.reg_alloc.HostCall(inst, args[0], args[1]);
-    } else {
+    code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
-        code.cvtsd2si(to, from); // 64 bit gpr
+    code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR());
    code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding)));
 }
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 64, false, 32);
 }
-void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    EmitFPToFixed(code, ctx, inst, 64, false, 64);
    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32();
    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
    bool round_towards_zero = args[1].GetImmediateU1();
    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero64(code, from, gpr_scratch.cvt64());
    }
    // First time is to set flags
    if (round_towards_zero) {
        code.cvttsd2si(gpr_scratch, from); // 32 bit gpr
    } else {
        code.cvtsd2si(gpr_scratch, from); // 32 bit gpr
    }
    // Clamp to output range
    ZeroIfNaN64(code, from, xmm_scratch);
    code.minsd(from, code.MConst(xword, f64_max_s32));
    code.maxsd(from, code.MConst(xword, f64_min_s32));
    // Second time is for real
    if (round_towards_zero) {
        code.cvttsd2si(to, from); // 32 bit gpr
    } else {
        code.cvtsd2si(to, from); // 32 bit gpr
 }
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 64, true, 32);
 }
-void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
-    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    EmitFPToFixed(code, ctx, inst, 64, true, 64);
    Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64();
    Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm();
    bool round_towards_zero = args[1].GetImmediateU1();
    // ARM saturates on conversion; this differs from x64 which returns a sentinel value.
    // TODO: Use VCVTPD2UDQ when AVX512VL is available.
    // FIXME: None of the FPSR exception bits are correctly signalled with the below code
    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero64(code, from, to);
    }
    // Clamp to output range
    ZeroIfNaN64(code, from, xmm_scratch);
    code.minsd(from, code.MConst(xword, f64_max_u32));
    code.maxsd(from, code.MConst(xword, f64_min_u32));
    if (round_towards_zero) {
        code.cvttsd2si(to, from); // 64 bit gpr
    } else {
        code.cvtsd2si(to, from); // 64 bit gpr
 }
-    ctx.reg_alloc.DefineValue(inst, to);
+void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 32, false, 32);
 }
 void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 32, false, 64);
 }
 void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 32, true, 32);
 }
 void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) {
    EmitFPToFixed(code, ctx, inst, 32, true, 64);
 }
 void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) {
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@ -10,7 +10,7 @@
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/bit_util.h"
-#include "common/fp_util.h"
+#include "common/fp/util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
@ -69,9 +69,9 @@ static void HandleNaNs(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& xm
    code.CallFunction(static_cast<void(*)(RegArray&, const RegArray&, const RegArray&)>(
        [](RegArray& result, const RegArray& a, const RegArray& b) {
            for (size_t i = 0; i < result.size(); ++i) {
-                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
+                if (auto r = FP::ProcessNaNs(a[i], b[i])) {
                    result[i] = *r;
-                } else if (Common::IsNaN(result[i])) {
+                } else if (FP::IsNaN(result[i])) {
                    result[i] = NaNWrapper<T>::value;
                }
            }
--- a/src/backend_x64/jitstate_info.h
+++ b/src/backend_x64/jitstate_info.h
@ -26,6 +26,7 @@ struct JitStateInfo {
        , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
        , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
        , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
        , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
    {}
    const size_t offsetof_cycles_remaining;
@ -39,6 +40,7 @@ struct JitStateInfo {
    const size_t offsetof_CPSR_nzcv;
    const size_t offsetof_FPSCR_IDC;
    const size_t offsetof_FPSCR_UFC;
    const size_t offsetof_fpsr_exc;
 };
 } // namespace Dynarmic::BackendX64
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@ -21,29 +21,29 @@ constexpr size_t BitSize() {
    return sizeof(T) * CHAR_BIT;
 }
 template <typename T>
 inline T Ones(size_t count) {
    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
    if (count == BitSize<T>())
        return static_cast<T>(~static_cast<T>(0));
    return ~(static_cast<T>(~static_cast<T>(0)) << count);
 }
 /// Extract bits [begin_bit, end_bit] inclusive from value of type T.
 template<size_t begin_bit, size_t end_bit, typename T>
 constexpr T Bits(const T value) {
    static_assert(begin_bit <= end_bit,
                  "invalid bit range (position of beginning bit cannot be greater than that of end bit)");
    static_assert(begin_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
-    static_assert(end_bit < BitSize<T>(), "begin_bit must be smaller than size of T");
+    static_assert(end_bit < BitSize<T>(), "end_bit must be smaller than size of T");
-    return (value >> begin_bit) & ((1 << (end_bit - begin_bit + 1)) - 1);
+    return (value >> begin_bit) & Ones<T>(end_bit - begin_bit + 1);
 }
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4554)
 #endif
 /// Extracts a single bit at bit_position from value of type T.
 template<size_t bit_position, typename T>
 constexpr bool Bit(const T value) {
    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return ((value >> bit_position) & 1) != 0;
 }
 /// Extracts a single bit at bit_position from value of type T.
 template<typename T>
 inline bool Bit(size_t bit_position, const T value) {
@ -51,6 +51,46 @@ inline bool Bit(size_t bit_position, const T value) {
    return ((value >> bit_position) & 1) != 0;
 }
 /// Extracts a single bit at bit_position from value of type T.
 template<size_t bit_position, typename T>
 constexpr bool Bit(const T value) {
    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return Bit<T>(bit_position, value);
 }
 /// Clears a single bit at bit_position from value of type T.
 template<typename T>
 inline T ClearBit(size_t bit_position, const T value) {
    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return value & ~(static_cast<T>(1) << bit_position);
 }
 /// Clears a single bit at bit_position from value of type T.
 template<size_t bit_position, typename T>
 constexpr T ClearBit(const T value) {
    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return ClearBit<T>(bit_position, value);
 }
 /// Modifies a single bit at bit_position from value of type T.
 template<typename T>
 inline T ModifyBit(size_t bit_position, const T value, bool new_bit) {
    ASSERT_MSG(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return ClearBit<T>(bit_position, value) | (static_cast<T>(new_bit) << bit_position);
 }
 /// Modifies a single bit at bit_position from value of type T.
 template<size_t bit_position, typename T>
 constexpr T ModifyBit(const T value, bool new_bit) {
    static_assert(bit_position < BitSize<T>(), "bit_position must be smaller than size of T");
    return ModifyBit<T>(bit_position, value, new_bit);
 }
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
@ -112,11 +152,8 @@ inline size_t LowestSetBit(T value) {
 }
 template <typename T>
-inline T Ones(size_t count) {
+inline bool MostSignificantBit(T value) {
-    ASSERT_MSG(count <= BitSize<T>(), "count larger than bitsize of T");
+    return Bit<BitSize<T>() - 1, T>(value);
    if (count == BitSize<T>())
        return ~static_cast<T>(0);
    return ~(~static_cast<T>(0) << count);
 }
 template <typename T>
--- a/src/common/fp/fpsr.h
+++ b/src/common/fp/fpsr.h
@ -0,0 +1,162 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <boost/optional.hpp>
 #include "common/bit_util.h"
 #include "common/common_types.h"
 namespace Dynarmic::FP {
 /**
 * Representation of the Floating-Point Status Register.
 */
 class FPSR final {
 public:
    FPSR() = default;
    FPSR(const FPSR&) = default;
    FPSR(FPSR&&) = default;
    explicit FPSR(u32 data) : value{data & mask} {}
    FPSR& operator=(const FPSR&) = default;
    FPSR& operator=(FPSR&&) = default;
    FPSR& operator=(u32 data) {
        value = data & mask;
        return *this;
    }
    /// Get negative condition flag
    bool N() const {
        return Common::Bit<31>(value);
    }
    /// Set negative condition flag
    void N(bool N_) {
        value = Common::ModifyBit<31>(value, N_);
    }
    /// Get zero condition flag
    bool Z() const {
        return Common::Bit<30>(value);
    }
    /// Set zero condition flag
    void Z(bool Z_) {
        value = Common::ModifyBit<30>(value, Z_);
    }
    /// Get carry condition flag
    bool C() const {
        return Common::Bit<29>(value);
    }
    /// Set carry condition flag
    void C(bool C_) {
        value = Common::ModifyBit<29>(value, C_);
    }
    /// Get overflow condition flag
    bool V() const {
        return Common::Bit<28>(value);
    }
    /// Set overflow condition flag
    void V(bool V_) {
        value = Common::ModifyBit<28>(value, V_);
    }
    /// Get cumulative saturation bit
    bool QC() const {
        return Common::Bit<27>(value);
    }
    /// Set cumulative saturation bit
    void QC(bool QC_) {
        value = Common::ModifyBit<27>(value, QC_);
    }
    /// Get input denormal floating-point exception bit
    bool IDC() const {
        return Common::Bit<7>(value);
    }
    /// Set input denormal floating-point exception bit
    void IDC(bool IDC_) {
        value = Common::ModifyBit<7>(value, IDC_);
    }
    /// Get inexact cumulative floating-point exception bit
    bool IXC() const {
        return Common::Bit<4>(value);
    }
    /// Set inexact cumulative floating-point exception bit
    void IXC(bool IXC_) {
        value = Common::ModifyBit<4>(value, IXC_);
    }
    /// Get underflow cumulative floating-point exception bit
    bool UFC() const {
        return Common::Bit<3>(value);
    }
    /// Set underflow cumulative floating-point exception bit
    void UFC(bool UFC_) {
        value = Common::ModifyBit<3>(value, UFC_);
    }
    /// Get overflow cumulative floating-point exception bit
    bool OFC() const {
        return Common::Bit<2>(value);
    }
    /// Set overflow cumulative floating-point exception bit
    void OFC(bool OFC_) {
        value = Common::ModifyBit<2>(value, OFC_);
    }
    /// Get divide by zero cumulative floating-point exception bit
    bool DZC() const {
        return Common::Bit<1>(value);
    }
    /// Set divide by zero cumulative floating-point exception bit
    void DZC(bool DZC_) {
        value = Common::ModifyBit<1>(value, DZC_);
    }
    /// Get invalid operation cumulative floating-point exception bit
    bool IOC() const {
        return Common::Bit<0>(value);
    }
    /// Set invalid operation cumulative floating-point exception bit
    void IOC(bool IOC_) {
        value = Common::ModifyBit<0>(value, IOC_);
    }
    /// Gets the underlying raw value within the FPSR.
    u32 Value() const {
        return value;
    }
 private:
    // Bits 5-6 and 8-26 are reserved.
    static constexpr u32 mask = 0xF800009F;
    u32 value = 0;
 };
 inline bool operator==(FPSR lhs, FPSR rhs) {
    return lhs.Value() == rhs.Value();
 }
 inline bool operator!=(FPSR lhs, FPSR rhs) {
    return !operator==(lhs, rhs);
 }
 } // namespace Dynarmic::FP
--- a/src/common/fp/info.h
+++ b/src/common/fp/info.h
@ -0,0 +1,58 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/common_types.h"
 namespace Dynarmic::FP {
 template<typename FPT>
 struct FPInfo {};
 template<>
 struct FPInfo<u32> {
    static constexpr size_t total_width = 32;
    static constexpr size_t exponent_width = 8;
    static constexpr size_t explicit_mantissa_width = 23;
    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
    static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
    static constexpr u32 sign_mask = 0x80000000;
    static constexpr u32 exponent_mask = 0x7F800000;
    static constexpr u32 mantissa_mask = 0x007FFFFF;
    static constexpr int exponent_min = -126;
    static constexpr int exponent_max = 127;
    static constexpr int exponent_bias = 127;
    static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
    static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
 };
 template<>
 struct FPInfo<u64> {
    static constexpr size_t total_width = 64;
    static constexpr size_t exponent_width = 11;
    static constexpr size_t explicit_mantissa_width = 52;
    static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
    static constexpr u64 implicit_leading_bit = u64(1) << explicit_mantissa_width;
    static constexpr u64 sign_mask = 0x8000'0000'0000'0000;
    static constexpr u64 exponent_mask = 0x7FF0'0000'0000'0000;
    static constexpr u64 mantissa_mask = 0x000F'FFFF'FFFF'FFFF;
    static constexpr int exponent_min = -1022;
    static constexpr int exponent_max = 1023;
    static constexpr int exponent_bias = 1023;
    static constexpr u64 Zero(bool sign) { return sign ? sign_mask : 0; }
    static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
 };
 } // namespace Dynarmic::FP 
--- a/src/common/fp/mantissa_util.h
+++ b/src/common/fp/mantissa_util.h
@ -0,0 +1,48 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/bit_util.h"
 #include "common/common_types.h"
 namespace Dynarmic::FP {
 enum class ResidualError {
    Zero,
    LessThanHalf,
    Half,
    GreaterThanHalf,
 };
 template<typename MantissaT>
 ResidualError ResidualErrorOnRightShift(MantissaT mantissa, int shift_amount) {
    if (shift_amount <= 0 || mantissa == 0) {
        return ResidualError::Zero;
    }
    if (shift_amount > static_cast<int>(Common::BitSize<MantissaT>())) {
        return Common::MostSignificantBit(mantissa) ? ResidualError::GreaterThanHalf : ResidualError::LessThanHalf;
    }
    const size_t half_bit_position = static_cast<size_t>(shift_amount - 1);
    const MantissaT half = static_cast<MantissaT>(1) << half_bit_position;
    const MantissaT error_mask = Common::Ones<MantissaT>(static_cast<size_t>(shift_amount));
    const MantissaT error = mantissa & error_mask;
    if (error == 0) {
        return ResidualError::Zero;
    }
    if (error < half) {
        return ResidualError::LessThanHalf;
    }
    if (error == half) {
        return ResidualError::Half;
    }
    return ResidualError::GreaterThanHalf;
 }
 } // namespace Dynarmic::FP 
--- a/src/common/fp/op.cpp
+++ b/src/common/fp/op.cpp
@ -0,0 +1,101 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include "common/assert.h"
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/safe_ops.h"
 #include "common/fp/fpsr.h"
 #include "common/fp/mantissa_util.h"
 #include "common/fp/op.h"
 #include "common/fp/process_exception.h"
 #include "common/fp/rounding_mode.h"
 #include "common/fp/unpacked.h"
 #include "frontend/A64/FPCR.h"
 namespace Dynarmic::FP {
 template<typename FPT>
 u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
    ASSERT(rounding != RoundingMode::ToOdd);
    ASSERT(ibits <= 64);
    ASSERT(fbits <= ibits);
    auto [type, sign, value] = FPUnpack<FPT>(op, fpcr, fpsr);
    if (type == FPType::SNaN || type == FPType::QNaN) {
        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
    }
    // Handle zero
    if (value.mantissa == 0) {
        return 0;
    }
    if (sign && unsigned_) {
        FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
        return 0;
    }
    // value *= 2.0^fbits
    value.exponent += static_cast<int>(fbits);
    u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
    bool round_up = false;
    switch (rounding) {
    case RoundingMode::ToNearest_TieEven:
        round_up = error > ResidualError::Half || (error == ResidualError::Half && Common::Bit<0>(int_result));
        break;
    case RoundingMode::TowardsPlusInfinity:
        round_up = error != ResidualError::Zero;
        break;
    case RoundingMode::TowardsMinusInfinity:
        round_up = false;
        break;
    case RoundingMode::TowardsZero:
        round_up = error != ResidualError::Zero && Common::MostSignificantBit(int_result);
        break;
    case RoundingMode::ToNearest_TieAwayFromZero:
        round_up = error > ResidualError::Half || (error == ResidualError::Half && !Common::MostSignificantBit(int_result));
        break;
    case RoundingMode::ToOdd:
        UNREACHABLE();
    }
    if (round_up) {
        int_result++;
    }
    // Detect Overflow
    const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
    if (value.exponent >= min_exponent_for_overflow) {
        // Positive overflow
        if (unsigned_ || !sign) {
            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
            return Common::Ones<u64>(ibits - (unsigned_ ? 0 : 1));
        }
        // Negative overflow
        const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
        if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
            return static_cast<u64>(1) << (ibits - 1);
        }
    }
    if (error != ResidualError::Zero) {
        FPProcessException(FPExc::Inexact, fpcr, fpsr);
    }
    return int_result & Common::Ones<u64>(ibits);
 }
 template u64 FPToFixed<u32>(size_t ibits, u32 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u64 FPToFixed<u64>(size_t ibits, u64 op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 } // namespace Dynarmic::FP 
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -0,0 +1,21 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/common_types.h"
 #include "common/fp/fpsr.h"
 #include "common/fp/rounding_mode.h"
 #include "frontend/A64/FPCR.h"
 namespace Dynarmic::FP {
 using FPCR = A64::FPCR;
 template<typename FPT>
 u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 } // namespace Dynarmic::FP 
--- a/src/common/fp/process_exception.cpp
+++ b/src/common/fp/process_exception.cpp
@ -0,0 +1,58 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include "common/assert.h"
 #include "common/fp/fpsr.h"
 #include "common/fp/process_exception.h"
 #include "frontend/A64/FPCR.h"
 namespace Dynarmic::FP {
 void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr) {
    switch (exception) {
    case FPExc::InvalidOp:
        if (fpcr.IOE()) {
            UNIMPLEMENTED();
        }
        fpsr.IOC(true);
        break;
    case FPExc::DivideByZero:
        if (fpcr.DZE()) {
            UNIMPLEMENTED();
        }
        fpsr.DZC(true);
        break;
    case FPExc::Overflow:
        if (fpcr.OFE()) {
            UNIMPLEMENTED();
        }
        fpsr.OFC(true);
        break;
    case FPExc::Underflow:
        if (fpcr.UFE()) {
            UNIMPLEMENTED();
        }
        fpsr.UFC(true);
        break;
    case FPExc::Inexact:
        if (fpcr.IXE()) {
            UNIMPLEMENTED();
        }
        fpsr.IXC(true);
        break;
    case FPExc::InputDenorm:
        if (fpcr.IDE()) {
            UNIMPLEMENTED();
        }
        fpsr.IDC(true);
        break;
    default:
        UNREACHABLE();
        break;
    }
 }
 } // namespace Dynarmic::FP 
--- a/src/common/fp/process_exception.h
+++ b/src/common/fp/process_exception.h
@ -0,0 +1,27 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/fp/fpsr.h"
 #include "frontend/A64/FPCR.h"
 namespace Dynarmic::FP {
 using FPCR = A64::FPCR;
 enum class FPExc {
    InvalidOp,
    DivideByZero,
    Overflow,
    Underflow,
    Inexact,
    InputDenorm,
 };
 void FPProcessException(FPExc exception, FPCR fpcr, FPSR& fpsr);
 } // namespace Dynarmic::FP 
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@ -0,0 +1,179 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include "common/fp/info.h"
 #include "common/fp/process_exception.h"
 #include "common/fp/unpacked.h"
 #include "common/safe_ops.h"
 namespace Dynarmic::FP {
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
    constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
    constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
    constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
    constexpr size_t mantissa_high_bit = FPInfo<FPT>::explicit_mantissa_width - 1;
    constexpr size_t mantissa_low_bit = 0;
    constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
    const bool sign = Common::Bit<sign_bit>(op);
    const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
    const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
    if (exp_raw == 0) {
        if (frac_raw == 0 || fpcr.FZ()) {
            if (frac_raw != 0) {
                FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
            }
            return {FPType::Zero, sign, {sign, 0, 0}};
        }
        return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
    }
    if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
        if (frac_raw == 0) {
            return {FPType::Infinity, sign, {sign, 1000000, 1}};
        }
        const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
        return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
    }
    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
    const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
    return {FPType::Nonzero, sign, {sign, exp, frac}};
 }
 template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
 template std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
 template<size_t F, typename MantissaT>
 std::tuple<bool, int, MantissaT, MantissaT> Normalize(FPUnpacked<MantissaT> op) {
    const int highest_set_bit = Common::HighestSetBit(op.mantissa);
    const int shift_amount = highest_set_bit - static_cast<int>(F);
    const MantissaT mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
    const MantissaT error = Safe::LogicalShiftRightDouble(op.mantissa, static_cast<MantissaT>(0), shift_amount);
    const int exponent = op.exponent + highest_set_bit;
    return std::make_tuple(op.sign, exponent, mantissa, error);
 }
 template<typename FPT, typename MantissaT>
 FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
    ASSERT(op.mantissa != 0);
    ASSERT(rounding != RoundingMode::ToNearest_TieAwayFromZero);
    constexpr int minimum_exp = FPInfo<FPT>::exponent_min;
    constexpr size_t E = FPInfo<FPT>::exponent_width;
    constexpr size_t F = FPInfo<FPT>::explicit_mantissa_width;
    constexpr bool isFP16 = FPInfo<FPT>::total_width == 16;
    auto [sign, exponent, mantissa, error] = Normalize<F>(op);
    if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
        fpsr.UFC(true);
        return FPInfo<FPT>::Zero(sign);
    }
    int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
    if (biased_exp == 0) {
        error = Safe::LogicalShiftRightDouble(mantissa, error, minimum_exp - exponent);
        mantissa = Safe::LogicalShiftRight(mantissa, minimum_exp - exponent);
    }
    if (biased_exp == 0 && (error != 0 || fpcr.UFE())) {
        FPProcessException(FPExc::Underflow, fpcr, fpsr);
    }
    bool round_up = false, overflow_to_inf = false;
    switch (rounding) {
    case RoundingMode::ToNearest_TieEven: {
        constexpr MantissaT half = static_cast<MantissaT>(1) << (Common::BitSize<MantissaT>() - 1);
        round_up = (error > half) || (error == half && Common::Bit<0>(mantissa));
        overflow_to_inf = true;
        break;
    }
    case RoundingMode::TowardsPlusInfinity:
        round_up = error != 0 && !sign;
        overflow_to_inf = !sign;
        break;
    case RoundingMode::TowardsMinusInfinity:
        round_up = error != 0 && sign;
        overflow_to_inf = sign;
        break;
    default:
        break;
    }
    if (round_up) {
        if ((mantissa & FPInfo<FPT>::mantissa_mask) == FPInfo<FPT>::mantissa_mask) {
            // Overflow on rounding up is going to happen
            if (mantissa == FPInfo<FPT>::mantissa_mask) {
                // Rounding up from denormal to normal
                mantissa++;
                biased_exp++;
            } else {
                // Rounding up to next exponent
                mantissa = (mantissa + 1) / 2;
                biased_exp++;
            }
        } else {
            mantissa++;
        }
    }
    if (error != 0 && rounding == RoundingMode::ToOdd) {
        mantissa = Common::ModifyBit<0>(mantissa, true);
    }
    FPT result = 0;
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4127) // C4127: conditional expression is constant
 #endif
    if (!isFP16 || !fpcr.AHP()) {
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
        constexpr int max_biased_exp = (1 << E) - 1;
        if (biased_exp >= max_biased_exp) {
            result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
            FPProcessException(FPExc::Overflow, fpcr, fpsr);
            FPProcessException(FPExc::Inexact, fpcr, fpsr);
        } else {
            result = sign ? 1 : 0;
            result <<= E;
            result += biased_exp;
            result <<= F;
            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
            if (error != 0) {
                FPProcessException(FPExc::Inexact, fpcr, fpsr);
            }
        }
    } else {
        constexpr int max_biased_exp = (1 << E);
        if (biased_exp >= max_biased_exp) {
            result = sign ? 0xFFFF : 0x7FFF;
            FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
        } else {
            result = sign ? 1 : 0;
            result <<= E;
            result += biased_exp;
            result <<= F;
            result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
            if (error != 0) {
                FPProcessException(FPExc::Inexact, fpcr, fpsr);
            }
        }
    }
    return result;
 }
 template u32 FPRoundBase<u32, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u64 FPRoundBase<u64, u64>(FPUnpacked<u64> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 } // namespace Dynarmic::FP
--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@ -0,0 +1,57 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <tuple>
 #include "common/common_types.h"
 #include "common/fp/fpsr.h"
 #include "frontend/A64/FPCR.h"
 namespace Dynarmic::FP {
 using FPCR = A64::FPCR;
 enum class FPType {
    Nonzero,
    Zero,
    Infinity,
    QNaN,
    SNaN,
 };
 /// value = (sign ? -1 : +1) * mantissa * 2^exponent
 template<typename MantissaT>
 struct FPUnpacked {
    bool sign;
    int exponent;
    MantissaT mantissa;
 };
 template<typename MantissaT>
 inline bool operator==(const FPUnpacked<MantissaT>& a, const FPUnpacked<MantissaT>& b) {
    return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
 }
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked<u64>> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
 template<typename FPT, typename MantissaT>
 FPT FPRoundBase(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template<typename FPT, typename MantissaT>
 FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
    fpcr.AHP(false);
    return FPRoundBase<FPT, MantissaT>(op, fpcr, rounding, fpsr);
 }
 template<typename FPT, typename MantissaT>
 FPT FPRound(FPUnpacked<MantissaT> op, FPCR fpcr, FPSR& fpsr) {
    return FPRound<FPT, MantissaT>(op, fpcr, fpcr.RMode(), fpsr);
 }
 } // namespace Dynarmic::FP
--- a/src/common/fp/util.h
+++ b/src/common/fp/util.h
@ -8,8 +8,7 @@
 #include <boost/optional.hpp>
-namespace Dynarmic {
+namespace Dynarmic::FP {
 namespace Common {
 /// Is 32-bit floating point value a QNaN?
 constexpr bool IsQNaN(u32 value) {
@ -110,5 +109,4 @@ inline boost::optional<u64> ProcessNaNs(u64 a, u64 b, u64 c) {
    return boost::none;
 }
-} // namespace Common
+} // namespace Dynarmic::FP
 } // namespace Dynarmic
--- a/src/common/mp/append.h
+++ b/src/common/mp/append.h
@ -0,0 +1,27 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<class... L>
 struct append_impl;
 template<template<class...> class LT, class... T1, class... T2>
 struct append_impl<LT<T1...>, T2...> {
    using type = LT<T1..., T2...>;
 };
 } // namespace detail
 /// Append items T to list L
 template<class L, class... T>
 using append = typename detail::append_impl<L, T...>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/bind.h
+++ b/src/common/mp/bind.h
@ -0,0 +1,18 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::Common::mp {
 /// Binds the first sizeof...(A) arguments of metafunction F with arguments A
 template<template<class...> class F, class... A>
 struct bind {
    template<class... T>
    using type = F<A..., T...>;
 };
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/cartesian_product.h
+++ b/src/common/mp/cartesian_product.h
@ -0,0 +1,51 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/mp/append.h"
 #include "common/mp/bind.h"
 #include "common/mp/concat.h"
 #include "common/mp/fmap.h"
 #include "common/mp/list.h"
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<class... Ls>
 struct cartesian_product_impl{};
 template<class RL>
 struct cartesian_product_impl<RL> {
    using type = RL;
 };
 template<template<class...> class LT, class... RT, class... T1>
 struct cartesian_product_impl<LT<RT...>, LT<T1...>> {
    using type = concat<
        fmap<bind<append, RT>::template type, list<T1...>>...
    >;
 };
 template<class RL, class L1, class L2, class... Ls>
 struct cartesian_product_impl<RL, L1, L2, Ls...> {
    using type = typename cartesian_product_impl<
        typename cartesian_product_impl<RL, L1>::type,
        L2,
        Ls...
    >::type;
 };
 } // namespace detail
 /// Produces the cartesian product of a set of lists
 /// For example: 
 /// cartesian_product<list<A, B>, list<D, E>> == list<list<A, D>, list<A, E>, list<B, D>, list<B, E>
 template<typename L1, typename... Ls>
 using cartesian_product = typename detail::cartesian_product_impl<fmap<list, L1>, Ls...>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/concat.h
+++ b/src/common/mp/concat.h
@ -0,0 +1,57 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include "common/mp/list.h"
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<class... L>
 struct concat_impl;
 template<>
 struct concat_impl<> {
    using type = list<>;
 };
 template<class L>
 struct concat_impl<L> {
    using type = L;
 };
 template<template<class...> class LT, class... T1, class... T2, class... Ls>
 struct concat_impl<LT<T1...>, LT<T2...>, Ls...> {
    using type = typename concat_impl<LT<T1..., T2...>, Ls...>::type;
 };
 template<template<class...> class LT,
         class... T1, class... T2, class... T3, class... T4, class... T5, class... T6, class... T7, class... T8,
         class... T9, class... T10, class... T11, class... T12, class... T13, class... T14, class... T15, class... T16,
         class... Ls>
 struct concat_impl<
        LT<T1...>, LT<T2...>, LT<T3...>, LT<T4...>, LT<T5...>, LT<T6...>, LT<T7...>, LT<T8...>,
        LT<T9...>, LT<T10...>, LT<T11...>, LT<T12...>, LT<T13...>, LT<T14...>, LT<T15...>, LT<T16...>,
        Ls...>
 {
    using type = typename concat_impl<
        LT<
            T1..., T2..., T3..., T4..., T5..., T6..., T7..., T8...,
            T9..., T10..., T11..., T12..., T13..., T14..., T15..., T16...
        >,
        Ls...
    >::type;
 };
 } // namespace detail
 /// Concatenate lists together
 template<class... L>
 using concat = typename detail::concat_impl<L...>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/fapply.h
+++ b/src/common/mp/fapply.h
@ -0,0 +1,27 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<template<class...> class F, class L>
 struct fapply_impl;
 template<template<class...> class F, template<class...> class LT, class... T>
 struct fapply_impl<F, LT<T...>> {
    using type = F<T...>;
 };
 } // namespace detail
 /// Invokes metafunction F where the arguments are all the members of list L
 template<template<class...> class F, class L>
 using fapply = typename detail::fapply_impl<F, L>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/fmap.h
+++ b/src/common/mp/fmap.h
@ -0,0 +1,27 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<template<class...> class F, class L>
 struct fmap_impl;
 template<template<class...> class F, template<class...> class LT, class... T>
 struct fmap_impl<F, LT<T...>> {
    using type = LT<F<T>...>;
 };
 } // namespace detail
 /// Metafunction that applies each element of list L to metafunction F
 template<template<class...> class F, class L>
 using fmap = typename detail::fmap_impl<F, L>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/integer.h
+++ b/src/common/mp/integer.h
@ -0,0 +1,51 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <cstddef>
 #include <cstdint>
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<std::size_t size>
 struct integer_of_size_impl{};
 template<>
 struct integer_of_size_impl<8> {
    using unsigned_type = std::uint8_t;
    using signed_type = std::int8_t;
 };
 template<>
 struct integer_of_size_impl<16> {
    using unsigned_type = std::uint16_t;
    using signed_type = std::int16_t;
 };
 template<>
 struct integer_of_size_impl<32> {
    using unsigned_type = std::uint32_t;
    using signed_type = std::int32_t;
 };
 template<>
 struct integer_of_size_impl<64> {
    using unsigned_type = std::uint64_t;
    using signed_type = std::int64_t;
 };
 } // namespace detail
 template<std::size_t size>
 using unsigned_integer_of_size = typename detail::integer_of_size_impl<size>::unsigned_type;
 template<std::size_t size>
 using signed_integer_of_size = typename detail::integer_of_size_impl<size>::signed_type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/list.h
+++ b/src/common/mp/list.h
@ -0,0 +1,15 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::Common::mp {
 /// Contains a list of types
 template<class... T>
 struct list {};
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/lut.h
+++ b/src/common/mp/lut.h
@ -0,0 +1,23 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <array>
 #include <map>
 #include <type_traits>
 #include "common/mp/list.h"
 namespace Dynarmic::Common::mp {
 template <typename KeyT, typename ValueT, typename Function, typename ...Values>
 inline auto GenerateLookupTableFromList(Function f, list<Values...>) {
    static const std::array<std::pair<KeyT, ValueT>, sizeof...(Values)> pair_array{f(Values{})...};
    return std::map<KeyT, ValueT>(pair_array.begin(), pair_array.end());
 }
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/to_tuple.h
+++ b/src/common/mp/to_tuple.h
@ -0,0 +1,29 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <tuple>
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<class L>
 struct to_tuple_impl;
 template<template<class...> class LT, class... T>
 struct to_tuple_impl<LT<T...>> {
    static constexpr auto value = std::make_tuple(static_cast<typename T::value_type>(T::value)...);
 };
 } // namespace detail
 /// Metafunction that converts a list of metavalues to a tuple value.
 template<class L>
 constexpr auto to_tuple = detail::to_tuple_impl<L>::value;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/vlift.h
+++ b/src/common/mp/vlift.h
@ -0,0 +1,17 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <type_traits>
 namespace Dynarmic::Common::mp {
 /// Lifts a value into a type
 template<auto V>
 using vlift = std::integral_constant<decltype(V), V>;
 } // namespace Dynarmic::Common::mp
--- a/src/common/mp/vllift.h
+++ b/src/common/mp/vllift.h
@ -0,0 +1,31 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <type_traits>
 #include "common/mp/list.h"
 namespace Dynarmic::Common::mp {
 namespace detail {
 template<class VL>
 struct vllift_impl{};
 template<class T, T... values>
 struct vllift_impl<std::integer_sequence<T, values...>> {
    using type = list<std::integral_constant<T, values>...>;
 };
 } // namespace detail
 /// Lifts values in value list VL to create a type list.
 template<class VL>
 using vllift = typename detail::vllift_impl<VL>::type;
 } // namespace Dynarmic::Common::mp
--- a/src/common/safe_ops.h
+++ b/src/common/safe_ops.h
@ -0,0 +1,109 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <type_traits>
 #include "common/bit_util.h"
 #include "common/common_types.h"
 #include "common/u128.h"
 namespace Dynarmic::Safe {
 template<typename T> T LogicalShiftLeft(T value, int shift_amount);
 template<typename T> T LogicalShiftRight(T value, int shift_amount);
 template<typename T> T ArithmeticShiftLeft(T value, int shift_amount);
 template<typename T> T ArithmeticShiftRight(T value, int shift_amount);
 template<typename T>
 T LogicalShiftLeft(T value, int shift_amount) {
    static_assert(std::is_integral_v<T>);
    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
        return 0;
    }
    if (shift_amount < 0) {
        return LogicalShiftRight(value, -shift_amount);
    }
    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
    return static_cast<T>(unsigned_value << shift_amount);
 }
 template<>
 inline u128 LogicalShiftLeft(u128 value, int shift_amount) {
    return value << shift_amount;
 }
 template<typename T>
 T LogicalShiftRight(T value, int shift_amount) {
    static_assert(std::is_integral_v<T>);
    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
        return 0;
    }
    if (shift_amount < 0) {
        return LogicalShiftLeft(value, -shift_amount);
    }
    auto unsigned_value = static_cast<std::make_unsigned_t<T>>(value);
    return static_cast<T>(unsigned_value >> shift_amount);
 }
 template<>
 inline u128 LogicalShiftRight(u128 value, int shift_amount) {
    return value >> shift_amount;
 }
 template<typename T>
 T LogicalShiftRightDouble(T top, T bottom, int shift_amount) {
    return LogicalShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
 }
 template<typename T>
 T ArithmeticShiftLeft(T value, int shift_amount) {
    static_assert(std::is_integral_v<T>);
    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
        return 0;
    }
    if (shift_amount < 0) {
        return ArithmeticShiftRight(value, -shift_amount);
    }
    auto signed_value = static_cast<std::make_signed_t<T>>(value);
    return static_cast<T>(signed_value << shift_amount);
 }
 template<typename T>
 T ArithmeticShiftRight(T value, int shift_amount) {
    static_assert(std::is_integral_v<T>);
    if (shift_amount >= static_cast<int>(Common::BitSize<T>())) {
        return Common::MostSignificantBit(value) ? ~static_cast<T>(0) : 0;
    }
    if (shift_amount < 0) {
        return ArithmeticShiftLeft(value, -shift_amount);
    }
    auto signed_value = static_cast<std::make_signed_t<T>>(value);
    return static_cast<T>(signed_value >> shift_amount);
 }
 template<typename T>
 T ArithmeticShiftRightDouble(T top, T bottom, int shift_amount) {
    return ArithmeticShiftLeft(top, int(Common::BitSize<T>()) - shift_amount) | LogicalShiftRight(bottom, shift_amount);
 }
 template<typename T>
 T Negate(T value) {
    return static_cast<T>(-static_cast<std::make_signed_t<T>>(value));
 }
 } // namespace Dynarmic::Safe
--- a/src/common/u128.cpp
+++ b/src/common/u128.cpp
@ -0,0 +1,64 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <array>
 #include "common/common_types.h"
 #include "common/u128.h"
 namespace Dynarmic {
 u128 operator<<(u128 operand, int amount) {
    if (amount < 0) {
        return operand >> -amount;
    }
    if (amount == 0) {
        return operand;
    }
    if (amount < 64) {
        u128 result;
        result.lower = (operand.lower << amount);
        result.upper = (operand.upper << amount) | (operand.lower >> (64 - amount));
        return result;
    }
    if (amount < 128) {
        u128 result;
        result.upper = operand.lower << (amount - 64);
        return result;
    }
    return {};
 }
 u128 operator>>(u128 operand, int amount) {
    if (amount < 0) {
        return operand << -amount;
    }
    if (amount == 0) {
        return operand;
    }
    if (amount < 64) {
        u128 result;
        result.lower = (operand.lower >> amount) | (operand.upper << (64 - amount));
        result.upper = (operand.upper >> amount);
        return result;
    }
    if (amount < 128) {
        u128 result;
        result.lower = operand.upper >> (amount - 64);
        return result;
    }
    return {};
 }
 } // namespace Dynarmic
--- a/src/common/u128.h
+++ b/src/common/u128.h
@ -0,0 +1,57 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <cstring>
 #include <type_traits>
 #include "common/bit_util.h"
 #include "common/common_types.h"
 namespace Dynarmic {
 struct u128 {
    u128() = default;
    u128(const u128&) = default;
    u128(u128&&) = default;
    u128& operator=(const u128&) = default;
    u128& operator=(u128&&) = default;
    u128(u64 lower_, u64 upper_) : lower(lower_), upper(upper_) {}
    template <typename T>
    /* implicit */ u128(T value) : lower(value), upper(0) {
        static_assert(std::is_integral_v<T>);
        static_assert(Common::BitSize<T>() <= Common::BitSize<u64>());
    }
    u64 lower = 0;
    u64 upper = 0;
 };
 static_assert(Common::BitSize<u128>() == 128);
 static_assert(std::is_standard_layout_v<u128>);
 static_assert(std::is_trivially_copyable_v<u128>);
 inline u128 operator+(u128 a, u128 b) {
    u128 result;
    result.lower = a.lower + b.lower;
    result.upper = a.upper + b.upper + (a.lower > result.lower);
    return result;
 }
 inline u128 operator-(u128 a, u128 b) {
    u128 result;
    result.lower = a.lower - b.lower;
    result.upper = a.upper - b.upper - (a.lower < result.lower);
    return result;
 }
 u128 operator<<(u128 operand, int amount);
 u128 operator>>(u128 operand, int amount);
 } // namespace Dynarmic
--- a/src/frontend/A32/translate/translate_arm/vfp2.cpp
+++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp
@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToU32(reg_m, round_towards_zero, true)
+                      ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
-                      : ir.FPSingleToU32(reg_m, round_towards_zero, true);
+                      : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s
    if (ConditionPassed(cond)) {
        auto reg_m = ir.GetExtendedRegister(m);
        auto result = sz
-                      ? ir.FPDoubleToS32(reg_m, round_towards_zero, true)
+                      ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode())
-                      : ir.FPSingleToS32(reg_m, round_towards_zero, true);
+                      : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode());
        ir.SetExtendedRegister(d, result);
    }
    return true;
--- a/src/frontend/A64/FPCR.h
+++ b/src/frontend/A64/FPCR.h
@ -37,6 +37,11 @@ public:
        return Common::Bit<26>(value);
    }
    /// Alternate half-precision control flag.
    void AHP(bool AHP_) {
        value = Common::ModifyBit<26>(value, AHP_);
    }
    /// Default NaN mode control bit.
    bool DN() const {
        return Common::Bit<25>(value);
@ -52,6 +57,10 @@ public:
        return static_cast<FP::RoundingMode>(Common::Bits<22, 23>(value));
    }
    bool FZ16() const {
        return Common::Bit<19>(value);
    }
    /// Input denormal exception trap enable flag.
    bool IDE() const {
        return Common::Bit<15>(value);
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@ -884,17 +884,17 @@ INST(FCVTZS_float_fix,       "FCVTZS (scalar, fixed-point)",              "z0011
 INST(FCVTZU_float_fix,       "FCVTZU (scalar, fixed-point)",              "z0011110yy011001ppppppnnnnnddddd")
 // Data Processing - FP and SIMD - Conversion between floating point and integer
-//INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
+INST(FCVTNS_float,           "FCVTNS (scalar)",                           "z0011110yy100000000000nnnnnddddd")
-//INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
+INST(FCVTNU_float,           "FCVTNU (scalar)",                           "z0011110yy100001000000nnnnnddddd")
 INST(SCVTF_float_int,        "SCVTF (scalar, integer)",                   "z0011110yy100010000000nnnnnddddd")
 INST(UCVTF_float_int,        "UCVTF (scalar, integer)",                   "z0011110yy100011000000nnnnnddddd")
-//INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
+INST(FCVTAS_float,           "FCVTAS (scalar)",                           "z0011110yy100100000000nnnnnddddd")
-//INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
+INST(FCVTAU_float,           "FCVTAU (scalar)",                           "z0011110yy100101000000nnnnnddddd")
 INST(FMOV_float_gen,         "FMOV (general)",                            "z0011110yy10r11o000000nnnnnddddd")
-//INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
+INST(FCVTPS_float,           "FCVTPS (scalar)",                           "z0011110yy101000000000nnnnnddddd")
-//INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
+INST(FCVTPU_float,           "FCVTPU (scalar)",                           "z0011110yy101001000000nnnnnddddd")
-//INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
+INST(FCVTMS_float,           "FCVTMS (scalar)",                           "z0011110yy110000000000nnnnnddddd")
-//INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
+INST(FCVTMU_float,           "FCVTMU (scalar)",                           "z0011110yy110001000000nnnnnddddd")
 INST(FCVTZS_float_int,       "FCVTZS (scalar, integer)",                  "z0011110yy111000000000nnnnnddddd")
 INST(FCVTZU_float_int,       "FCVTZU (scalar, integer)",                  "z0011110yy111001000000nnnnnddddd")
 //INST(FJCVTZS,                "FJCVTZS",                                   "0001111001111110000000nnnnnddddd")
--- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp
@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec
    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToU32(fltval, true, true);
+        intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToU32(fltval, true, true);
+        intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero);
    } else {
        UNREACHABLE();
    }
--- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp
@ -6,6 +6,7 @@
 #include <boost/optional.hpp>
 #include "common/fp/rounding_mode.h"
 #include "frontend/A64/translate/impl/impl.h"
 namespace Dynarmic::A64 {
@ -135,58 +136,98 @@ bool TranslatorVisitor::FMOV_float_gen(bool sf, Imm<2> type, Imm<1> rmode_0, Imm
    return true;
 }
-bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
+static bool FloaingPointConvertSignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = GetDataSize(type);
    if (!fltsize || *fltsize == 16) {
-        return UnallocatedEncoding();
+        return v.UnallocatedEncoding();
    }
-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
-        intval = ir.FPSingleToS32(fltval, true, true);
+        intval = v.ir.FPSingleToFixedS32(fltval, 0, rounding_mode);
    } else if (intsize == 32 && *fltsize == 64) {
-        intval = ir.FPDoubleToS32(fltval, true, true);
+        intval = v.ir.FPDoubleToFixedS32(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 32) {
-        return InterpretThisInstruction();
+        intval = v.ir.FPSingleToFixedS64(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 64) {
-        return InterpretThisInstruction();
+        intval = v.ir.FPDoubleToFixedS64(fltval, 0, rounding_mode);
    } else {
        UNREACHABLE();
    }
-    X(intsize, Rd, intval);
+    v.X(intsize, Rd, intval);
    return true;
 }
 static bool FloaingPointConvertUnsignedInteger(TranslatorVisitor& v, bool sf, Imm<2> type, Vec Vn, Reg Rd, FP::RoundingMode rounding_mode) {
    const size_t intsize = sf ? 64 : 32;
    const auto fltsize = GetDataSize(type);
    if (!fltsize || *fltsize == 16) {
        return v.UnallocatedEncoding();
    }
    const IR::U32U64 fltval = v.V_scalar(*fltsize, Vn);
    IR::U32U64 intval;
    if (intsize == 32 && *fltsize == 32) {
        intval = v.ir.FPSingleToFixedU32(fltval, 0, rounding_mode);
    } else if (intsize == 32 && *fltsize == 64) {
        intval = v.ir.FPDoubleToFixedU32(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 32) {
        intval = v.ir.FPSingleToFixedU64(fltval, 0, rounding_mode);
    } else if (intsize == 64 && *fltsize == 64) {
        intval = v.ir.FPDoubleToFixedU64(fltval, 0, rounding_mode);
    } else {
        UNREACHABLE();
    }
    v.X(intsize, Rd, intval);
    return true;
 }
 bool TranslatorVisitor::FCVTNS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
 }
 bool TranslatorVisitor::FCVTNU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieEven);
 }
 bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
 }
 bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
-    const size_t intsize = sf ? 64 : 32;
+    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsZero);
    const auto fltsize = GetDataSize(type);
    if (!fltsize || *fltsize == 16) {
        return UnallocatedEncoding();
 }
-    const IR::U32U64 fltval = V_scalar(*fltsize, Vn);
+bool TranslatorVisitor::FCVTAS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
-    IR::U32U64 intval;
+    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
    if (intsize == 32 && *fltsize == 32) {
        intval = ir.FPSingleToU32(fltval, true, true);
    } else if (intsize == 32 && *fltsize == 64) {
        intval = ir.FPDoubleToU32(fltval, true, true);
    } else if (intsize == 64 && *fltsize == 32) {
        return InterpretThisInstruction();
    } else if (intsize == 64 && *fltsize == 64) {
        return InterpretThisInstruction();
    } else {
        UNREACHABLE();
 }
-    X(intsize, Rd, intval);
+bool TranslatorVisitor::FCVTAU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::ToNearest_TieAwayFromZero);
 }
-    return true;
+bool TranslatorVisitor::FCVTPS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
 }
 bool TranslatorVisitor::FCVTPU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsPlusInfinity);
 }
 bool TranslatorVisitor::FCVTMS_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertSignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
 }
 bool TranslatorVisitor::FCVTMU_float(bool sf, Imm<2> type, Vec Vn, Reg Rd) {
    return FloaingPointConvertUnsignedInteger(*this, sf, type, Vn, Rd, FP::RoundingMode::TowardsMinusInfinity);
 }
 } // namespace Dynarmic::A64
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) {
    return Inst<U64>(Opcode::FPSingleToDouble, a);
 }
-U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
+U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fpscr_controlled);
+    ASSERT(fbits <= 32);
-    return Inst<U32>(Opcode::FPSingleToS32, a, Imm1(round_towards_zero));
+    return Inst<U32>(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
-U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) {
+U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fpscr_controlled);
+    ASSERT(fbits <= 64);
-    return Inst<U32>(Opcode::FPSingleToU32, a, Imm1(round_towards_zero));
+    return Inst<U64>(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
-U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
+U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fpscr_controlled);
+    ASSERT(fbits <= 32);
-    return Inst<U32>(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero));
+    return Inst<U32>(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
-U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) {
+U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) {
-    ASSERT(fpscr_controlled);
+    ASSERT(fbits <= 64);
-    return Inst<U32>(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero));
+    return Inst<U64>(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 32);
    return Inst<U32>(Opcode::FPSingleToFixedS32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 64);
    return Inst<U64>(Opcode::FPSingleToFixedS64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 32);
    return Inst<U32>(Opcode::FPSingleToFixedU32, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) {
    ASSERT(fbits <= 64);
    return Inst<U64>(Opcode::FPSingleToFixedU64, a, Imm8(static_cast<u8>(fbits)), Imm8(static_cast<u8>(rounding)));
 }
 U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@ -12,6 +12,10 @@
 #include "frontend/ir/terminal.h"
 #include "frontend/ir/value.h"
 namespace Dynarmic::FP {
 enum class RoundingMode;
 } // namespace Dynarmic::FP
 // ARM JIT Microinstruction Intermediate Representation
 //
 // This intermediate representation is an SSA IR. It is designed primarily for analysis,
@ -264,10 +268,14 @@ public:
    U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
    U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
    U64 FPSingleToDouble(const U32& a, bool fpscr_controlled);
-    U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
+    U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled);
+    U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
+    U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding);
-    U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled);
+    U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding);
    U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled);
    U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled);
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@ -386,10 +386,14 @@ OPCODE(FPSub64,                             T::U64,         T::U64,         T::U
 // Floating-point conversions
 OPCODE(FPSingleToDouble,                    T::U64,         T::U32                                          )
 OPCODE(FPDoubleToSingle,                    T::U32,         T::U64                                          )
-OPCODE(FPSingleToU32,                       T::U32,         T::U32,         T::U1                           )
+OPCODE(FPDoubleToFixedS32,                  T::U32,         T::U64,         T::U8,          T::U8           )
-OPCODE(FPSingleToS32,                       T::U32,         T::U32,         T::U1                           )
+OPCODE(FPDoubleToFixedS64,                  T::U64,         T::U64,         T::U8,          T::U8           )
-OPCODE(FPDoubleToU32,                       T::U32,         T::U64,         T::U1                           )
+OPCODE(FPDoubleToFixedU32,                  T::U32,         T::U64,         T::U8,          T::U8           )
-OPCODE(FPDoubleToS32,                       T::U32,         T::U64,         T::U1                           )
+OPCODE(FPDoubleToFixedU64,                  T::U64,         T::U64,         T::U8,          T::U8           )
 OPCODE(FPSingleToFixedS32,                  T::U32,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPSingleToFixedS64,                  T::U64,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPSingleToFixedU32,                  T::U32,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPSingleToFixedU64,                  T::U64,         T::U32,         T::U8,          T::U8           )
 OPCODE(FPU32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPS32ToSingle,                       T::U32,         T::U32,         T::U1                           )
 OPCODE(FPU32ToDouble,                       T::U64,         T::U32,         T::U1                           )
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -29,7 +29,11 @@ add_executable(dynarmic_tests
    A64/inst_gen.cpp
    A64/inst_gen.h
    A64/testenv.h
    fp/FPToFixed.cpp
    fp/mantissa_util_tests.cpp
    fp/unpacked_tests.cpp
    main.cpp
    mp.cpp
    rand_int.h
 )
--- a/tests/fp/FPToFixed.cpp
+++ b/tests/fp/FPToFixed.cpp
@ -0,0 +1,38 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <tuple>
 #include <vector>
 #include <catch.hpp>
 #include "common/fp/fpsr.h"
 #include "common/fp/op.h"
 #include "rand_int.h"
 using namespace Dynarmic;
 using namespace Dynarmic::FP;
 TEST_CASE("FPToFixed", "[fp]") {
    const std::vector<std::tuple<u32, size_t, u64, u32>> test_cases {
        {0x447A0000, 64, 0x000003E8, 0x00},
        {0xC47A0000, 32, 0xFFFFFC18, 0x00},
        {0x4479E000, 64, 0x000003E8, 0x10},
        {0x50800000, 32, 0x7FFFFFFF, 0x01},
        {0xD0800000, 32, 0x80000000, 0x01},
        {0xCF000000, 32, 0x80000000, 0x00},
        {0x80002B94, 64, 0x00000000, 0x10},
        {0x80636D24, 64, 0x00000000, 0x10},
    };
    const FPCR fpcr;
    for (auto [input, ibits, expected_output, expected_fpsr] : test_cases) {
        FPSR fpsr;
        const u64 output = FPToFixed<u32>(ibits, input, 0, false, fpcr, RoundingMode::ToNearest_TieEven, fpsr);
        REQUIRE(output == expected_output);
        REQUIRE(fpsr.Value() == expected_fpsr);
    }
 }
--- a/tests/fp/mantissa_util_tests.cpp
+++ b/tests/fp/mantissa_util_tests.cpp
@ -0,0 +1,63 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <tuple>
 #include <vector>
 #include <catch.hpp>
 #include "common/fp/mantissa_util.h"
 #include "common/safe_ops.h"
 #include "rand_int.h"
 using namespace Dynarmic;
 using namespace Dynarmic::FP;
 TEST_CASE("ResidualErrorOnRightShift", "[fp]") {
    const std::vector<std::tuple<u32, int, ResidualError>> test_cases {
        {0x00000001, 1, ResidualError::Half},
        {0x00000002, 1, ResidualError::Zero},
        {0x00000001, 2, ResidualError::LessThanHalf},
        {0x00000002, 2, ResidualError::Half},
        {0x00000003, 2, ResidualError::GreaterThanHalf},
        {0x00000004, 2, ResidualError::Zero},
        {0x00000005, 2, ResidualError::LessThanHalf},
        {0x00000006, 2, ResidualError::Half},
        {0x00000007, 2, ResidualError::GreaterThanHalf},
    };
    for (auto [mantissa, shift, expected_result] : test_cases) {
        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
        REQUIRE(result == expected_result);
    }
 }
 TEST_CASE("ResidualErrorOnRightShift Randomized", "[fp]") {
    for (size_t test = 0; test < 100000; test++) {
        const u32 mantissa = RandInt<u32>(0, 0xFFFFFFFF);
        const int shift = RandInt<int>(-60, 60);
        const ResidualError result = ResidualErrorOnRightShift(mantissa, shift);
        const u64 calculated_error = Safe::ArithmeticShiftRightDouble(Common::SignExtend<32, u64>(mantissa), u64(0), shift);
        const ResidualError expected_result = [&]{
            constexpr u64 half_error = 0x8000'0000'0000'0000ull;
            if (calculated_error == 0) {
                return ResidualError::Zero;
            }
            if (calculated_error < half_error) {
                return ResidualError::LessThanHalf;
            }
            if (calculated_error == half_error) {
                return ResidualError::Half;
            }
            return ResidualError::GreaterThanHalf;
        }();
        INFO(std::hex << "mantissa " << mantissa << " shift " << shift << " calculated_error " << calculated_error);
        REQUIRE(result == expected_result);
    }
 }
--- a/tests/fp/unpacked_tests.cpp
+++ b/tests/fp/unpacked_tests.cpp
@ -0,0 +1,71 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <catch.hpp>
 #include "common/fp/unpacked.h"
 #include "rand_int.h"
 using namespace Dynarmic;
 using namespace Dynarmic::FP;
 TEST_CASE("FPUnpack Tests", "[fp]") {
    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
        {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
        {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
        {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
        {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
        {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
    };
    const FPCR fpcr;
    for (const auto& [input, expected_output, expected_fpsr] : test_cases) {
        FPSR fpsr;
        const auto output = FPUnpack<u32>(input, fpcr, fpsr);
        INFO("Input: " << std::hex << input);
        REQUIRE(output == expected_output);
        REQUIRE(fpsr.Value() == expected_fpsr);
    }
 }
 TEST_CASE("FPRound Tests", "[fp]") {
    const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked<u64>>, u32>> test_cases {
        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
        {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
    };
    const FPCR fpcr;
    for (const auto& [expected_output, input, expected_fpsr] : test_cases) {
        FPSR fpsr;
        const auto output = FPRound<u32>(std::get<2>(input), fpcr, fpsr);
        INFO("Expected Output: " << std::hex << expected_output);
        REQUIRE(output == expected_output);
        REQUIRE(fpsr.Value() == expected_fpsr);
    }
 }
 TEST_CASE("FPUnpack<->FPRound Round-trip Tests", "[fp]") {
    const FPCR fpcr;
    for (size_t count = 0; count < 100000; count++) {
        FPSR fpsr;
        const u32 input = RandInt(0, 1) == 0 ? RandInt<u32>(0x00000001, 0x7F800000) : RandInt<u32>(0x80000001, 0xFF800000);
        const auto intermediate = std::get<2>(FPUnpack<u32>(input, fpcr, fpsr));
        const u32 output = FPRound<u32>(intermediate, fpcr, fpsr);
        INFO("Count: " << count);
        INFO("Intermediate Values: " << std::hex << intermediate.sign << ';' << intermediate.exponent << ';' << intermediate.mantissa);
        REQUIRE(input == output);
    }
 }
--- a/tests/mp.cpp
+++ b/tests/mp.cpp
@ -0,0 +1,27 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include <type_traits>
 #include "common/mp/cartesian_product.h"
 using namespace Dynarmic::Common::mp;
 static_assert(
    std::is_same_v<
        cartesian_product<list<int, bool>, list<double, float>, list<char, unsigned>>,
        list<
            list<int, double, char>,
            list<int, double, unsigned>,
            list<int, float, char>,
            list<int, float, unsigned>,
            list<bool, double, char>,
            list<bool, double, unsigned>,
            list<bool, float, char>,
            list<bool, float, unsigned>
        >
    >
 );