diff --git a/src/backend_x64/a32_jitstate.cpp b/src/backend_x64/a32_jitstate.cpp index 5a9ee868..c274d7fc 100644 --- a/src/backend_x64/a32_jitstate.cpp +++ b/src/backend_x64/a32_jitstate.cpp @@ -163,6 +163,7 @@ u32 A32JitState::Fpscr() const { FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE FPSCR |= FPSCR_IDC; FPSCR |= FPSCR_UFC; + FPSCR |= fpsr_exc; return FPSCR; } @@ -183,13 +184,10 @@ void A32JitState::SetFpscr(u32 FPSCR) { const std::array MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000}; guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; - // Cumulative flags IOC, IXC, UFC, OFC, DZC - guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC - guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC - - // Cumulative flag IDC, UFC - FPSCR_IDC = FPSCR & (1 << 7); - FPSCR_UFC = FPSCR & (1 << 3); + // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC + FPSCR_IDC = 0; + FPSCR_UFC = 0; + fpsr_exc = FPSCR & 0x9F; if (Common::Bit<24>(FPSCR)) { // VFP Flush to Zero diff --git a/src/backend_x64/a32_jitstate.h b/src/backend_x64/a32_jitstate.h index 5294f8e4..3688ac82 100644 --- a/src/backend_x64/a32_jitstate.h +++ b/src/backend_x64/a32_jitstate.h @@ -66,6 +66,7 @@ struct A32JitState { std::array rsb_codeptrs; void ResetRSB(); + u32 fpsr_exc = 0; u32 FPSCR_IDC = 0; u32 FPSCR_UFC = 0; u32 FPSCR_mode = 0; diff --git a/src/backend_x64/a64_jitstate.cpp b/src/backend_x64/a64_jitstate.cpp index efc8d167..eee952f5 100644 --- a/src/backend_x64/a64_jitstate.cpp +++ b/src/backend_x64/a64_jitstate.cpp @@ -105,16 +105,15 @@ u32 A64JitState::GetFpsr() const { fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE fpsr |= FPSCR_IDC; fpsr |= FPSCR_UFC; + fpsr |= fpsr_exc; return fpsr; } void A64JitState::SetFpsr(u32 value) { guest_MXCSR &= ~0x0000003D; - guest_MXCSR |= ( value ) & 0b0000000000001; // IE = IOC - guest_MXCSR |= ( value << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC - - FPSCR_IDC = value & (1 << 7); - FPSCR_UFC = value & (1 << 3); + FPSCR_IDC = 0; + FPSCR_UFC = 0; + fpsr_exc = value & 0x9F; } } // namespace Dynarmic::BackendX64 diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h index a1c3a2dc..b8fdd5e1 100644 --- a/src/backend_x64/a64_jitstate.h +++ b/src/backend_x64/a64_jitstate.h @@ -71,6 +71,7 @@ struct A64JitState { rsb_codeptrs.fill(0); } + u32 fpsr_exc = 0; u32 FPSCR_IDC = 0; u32 FPSCR_UFC = 0; u32 fpcr = 0; diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 253c235d..7bcfe410 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -5,13 +5,22 @@ */ #include +#include #include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/fp/op.h" #include "common/fp/util.h" +#include "common/mp/cartesian_product.h" +#include "common/mp/integer.h" +#include "common/mp/list.h" +#include "common/mp/lut.h" +#include "common/mp/to_tuple.h" +#include "common/mp/vlift.h" +#include "common/mp/vllift.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" #include "frontend/ir/opcodes.h" @@ -19,6 +28,7 @@ namespace Dynarmic::BackendX64 { using namespace Xbyak::util; +namespace mp = Dynarmic::Common::mp; constexpr u64 f32_negative_zero = 0x80000000u; constexpr u64 f32_nan = 0x7fc00000u; @@ -29,10 +39,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; -constexpr u64 f64_min_s32 = 0xc1e0000000000000u; // -2147483648 as a double -constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double -constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double -constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double static void DenormalsAreZero32(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) { Xbyak::Label end; @@ -99,12 +105,6 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 code.L(end); } -static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { - code.pxor(xmm_scratch, xmm_scratch); - code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) - code.pand(xmm_value, xmm_scratch); -} - static void PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b, Xbyak::Label& end) { Xbyak::Label nan; @@ -892,129 +892,82 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitFPSingleToS32(EmitContext& ctx, IR::Inst* inst) { +static void EmitFPToFixedFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize, bool unsigned_, size_t isize) { + using fsize_list = mp::list, mp::vlift>; + using unsigned_list = mp::list, mp::vlift>; + using isize_list = mp::list, mp::vlift>; + using rounding_list = mp::list< + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant + >; + + using key_type = std::tuple; + using value_type = u64(*)(u64, u8, FP::FPSR&, A64::FPCR); + + static const auto lut = mp::GenerateLookupTableFromList( + [](auto args) { + return std::pair{ + mp::to_tuple, + static_cast( + [](u64 input, u8 fbits, FP::FPSR& fpsr, A64::FPCR fpcr) { + constexpr auto t = mp::to_tuple; + constexpr size_t fsize = std::get<0>(t); + constexpr bool unsigned_ = std::get<1>(t); + constexpr size_t isize = std::get<2>(t); + constexpr FP::RoundingMode rounding_mode = std::get<3>(t); + using InputSize = mp::unsigned_integer_of_size; + + return FP::FPToFixed(isize, static_cast(input), fbits, unsigned_, fpcr, rounding_mode, fpsr); + } + ) + }; + }, + mp::cartesian_product{} + ); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // Conversion to double is lossless, and allows for clamping. + const auto rounding = static_cast(args[2].GetImmediateU8()); - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero32(code, from, to); - } - code.cvtss2sd(from, from); - // First time is to set flags - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_s32)); - code.maxsd(from, code.MConst(xword, f64_min_s32)); - // Second time is for real - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.HostCall(inst, args[0], args[1]); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR()); + code.CallFunction(lut.at(std::make_tuple(fsize, unsigned_, isize, rounding))); } -void EmitX64::EmitFPSingleToU32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); - - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // Conversion to double is lossless, and allows for accurate clamping. - // - // Since SSE2 doesn't provide an unsigned conversion, we use a 64-bit signed conversion. - // - // FIXME: None of the FPSR exception bits are correctly signalled with the below code - - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, to); - } - code.cvtss2sd(from, from); - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_u32)); - code.maxsd(from, code.MConst(xword, f64_min_u32)); - if (round_towards_zero) { - code.cvttsd2si(to, from); // 64 bit gpr - } else { - code.cvtsd2si(to, from); // 64 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPDoubleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, false, 32); } -void EmitX64::EmitFPDoubleToS32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg32 to = ctx.reg_alloc.ScratchGpr().cvt32(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); - bool round_towards_zero = args[1].GetImmediateU1(); - - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, gpr_scratch.cvt64()); - } - // First time is to set flags - if (round_towards_zero) { - code.cvttsd2si(gpr_scratch, from); // 32 bit gpr - } else { - code.cvtsd2si(gpr_scratch, from); // 32 bit gpr - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_s32)); - code.maxsd(from, code.MConst(xword, f64_min_s32)); - // Second time is for real - if (round_towards_zero) { - code.cvttsd2si(to, from); // 32 bit gpr - } else { - code.cvtsd2si(to, from); // 32 bit gpr - } - - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPDoubleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, false, 64); } -void EmitX64::EmitFPDoubleToU32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[0]); - Xbyak::Reg64 to = ctx.reg_alloc.ScratchGpr().cvt64(); - Xbyak::Xmm xmm_scratch = ctx.reg_alloc.ScratchXmm(); - bool round_towards_zero = args[1].GetImmediateU1(); +void EmitX64::EmitFPDoubleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, true, 32); +} - // ARM saturates on conversion; this differs from x64 which returns a sentinel value. - // TODO: Use VCVTPD2UDQ when AVX512VL is available. - // FIXME: None of the FPSR exception bits are correctly signalled with the below code +void EmitX64::EmitFPDoubleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 64, true, 64); +} - if (ctx.FPSCR_FTZ()) { - DenormalsAreZero64(code, from, to); - } - // Clamp to output range - ZeroIfNaN64(code, from, xmm_scratch); - code.minsd(from, code.MConst(xword, f64_max_u32)); - code.maxsd(from, code.MConst(xword, f64_min_u32)); - if (round_towards_zero) { - code.cvttsd2si(to, from); // 64 bit gpr - } else { - code.cvtsd2si(to, from); // 64 bit gpr - } +void EmitX64::EmitFPSingleToFixedS32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, false, 32); +} - ctx.reg_alloc.DefineValue(inst, to); +void EmitX64::EmitFPSingleToFixedS64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, false, 64); +} + +void EmitX64::EmitFPSingleToFixedU32(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, true, 32); +} + +void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { + EmitFPToFixedFallback(code, ctx, inst, 32, true, 64); } void EmitX64::EmitFPS32ToSingle(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/backend_x64/jitstate_info.h b/src/backend_x64/jitstate_info.h index 2c267bb6..3caca413 100644 --- a/src/backend_x64/jitstate_info.h +++ b/src/backend_x64/jitstate_info.h @@ -26,6 +26,7 @@ struct JitStateInfo { , offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv)) , offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC)) , offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC)) + , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) {} const size_t offsetof_cycles_remaining; @@ -39,6 +40,7 @@ struct JitStateInfo { const size_t offsetof_CPSR_nzcv; const size_t offsetof_FPSCR_IDC; const size_t offsetof_FPSCR_UFC; + const size_t offsetof_fpsr_exc; }; } // namespace Dynarmic::BackendX64 diff --git a/src/frontend/A32/translate/translate_arm/vfp2.cpp b/src/frontend/A32/translate/translate_arm/vfp2.cpp index defc7b7a..898a8418 100644 --- a/src/frontend/A32/translate/translate_arm/vfp2.cpp +++ b/src/frontend/A32/translate/translate_arm/vfp2.cpp @@ -442,8 +442,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_u32(Cond cond, bool D, size_t Vd, bool s if (ConditionPassed(cond)) { auto reg_m = ir.GetExtendedRegister(m); auto result = sz - ? ir.FPDoubleToU32(reg_m, round_towards_zero, true) - : ir.FPSingleToU32(reg_m, round_towards_zero, true); + ? ir.FPDoubleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()) + : ir.FPSingleToFixedU32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()); ir.SetExtendedRegister(d, result); } return true; @@ -457,8 +457,8 @@ bool ArmTranslatorVisitor::vfp2_VCVT_to_s32(Cond cond, bool D, size_t Vd, bool s if (ConditionPassed(cond)) { auto reg_m = ir.GetExtendedRegister(m); auto result = sz - ? ir.FPDoubleToS32(reg_m, round_towards_zero, true) - : ir.FPSingleToS32(reg_m, round_towards_zero, true); + ? ir.FPDoubleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()) + : ir.FPSingleToFixedS32(reg_m, 0, round_towards_zero ? FP::RoundingMode::TowardsZero : ir.current_location.FPSCR().RMode()); ir.SetExtendedRegister(d, result); } return true; diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp index a86fbb10..8f4eb7df 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp @@ -38,13 +38,13 @@ bool TranslatorVisitor::FCVTZS_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToS32(fltval, true, true); + intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToS32(fltval, true, true); + intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } @@ -69,13 +69,13 @@ bool TranslatorVisitor::FCVTZU_float_fix(bool sf, Imm<2> type, Imm<6> scale, Vec IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToU32(fltval, true, true); + intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToU32(fltval, true, true); + intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } diff --git a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp index fd634dcc..009744bd 100644 --- a/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp +++ b/src/frontend/A64/translate/impl/floating_point_conversion_integer.cpp @@ -146,13 +146,13 @@ bool TranslatorVisitor::FCVTZS_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToS32(fltval, true, true); + intval = ir.FPSingleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToS32(fltval, true, true); + intval = ir.FPDoubleToFixedS32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedS64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } @@ -173,13 +173,13 @@ bool TranslatorVisitor::FCVTZU_float_int(bool sf, Imm<2> type, Vec Vn, Reg Rd) { IR::U32U64 intval; if (intsize == 32 && *fltsize == 32) { - intval = ir.FPSingleToU32(fltval, true, true); + intval = ir.FPSingleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 32 && *fltsize == 64) { - intval = ir.FPDoubleToU32(fltval, true, true); + intval = ir.FPDoubleToFixedU32(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 32) { - return InterpretThisInstruction(); + intval = ir.FPSingleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else if (intsize == 64 && *fltsize == 64) { - return InterpretThisInstruction(); + intval = ir.FPDoubleToFixedU64(fltval, 0, FP::RoundingMode::TowardsZero); } else { UNREACHABLE(); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 3f7c6d33..e3858a7a 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1451,24 +1451,44 @@ U64 IREmitter::FPSingleToDouble(const U32& a, bool fpscr_controlled) { return Inst(Opcode::FPSingleToDouble, a); } -U32 IREmitter::FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPSingleToS32, a, Imm1(round_towards_zero)); +U32 IREmitter::FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPDoubleToFixedS32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPSingleToU32, a, Imm1(round_towards_zero)); +U64 IREmitter::FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPDoubleToFixedS64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPDoubleToS32, a, Imm1(round_towards_zero)); +U32 IREmitter::FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPDoubleToFixedU32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } -U32 IREmitter::FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled) { - ASSERT(fpscr_controlled); - return Inst(Opcode::FPDoubleToU32, a, Imm1(round_towards_zero)); +U64 IREmitter::FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPDoubleToFixedU64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U32 IREmitter::FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPSingleToFixedS32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U64 IREmitter::FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPSingleToFixedS64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U32 IREmitter::FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 32); + return Inst(Opcode::FPSingleToFixedU32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); +} + +U64 IREmitter::FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= 64); + return Inst(Opcode::FPSingleToFixedU64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); } U32 IREmitter::FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 8bc248d0..c783c164 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -12,6 +12,10 @@ #include "frontend/ir/terminal.h" #include "frontend/ir/value.h" +namespace Dynarmic::FP { +enum class RoundingMode; +} // namespace Dynarmic::FP + // ARM JIT Microinstruction Intermediate Representation // // This intermediate representation is an SSA IR. It is designed primarily for analysis, @@ -264,10 +268,14 @@ public: U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); U64 FPSingleToDouble(const U32& a, bool fpscr_controlled); - U32 FPSingleToS32(const U32& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPSingleToU32(const U32& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPDoubleToS32(const U64& a, bool round_towards_zero, bool fpscr_controlled); - U32 FPDoubleToU32(const U64& a, bool round_towards_zero, bool fpscr_controlled); + U32 FPDoubleToFixedS32(const U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPDoubleToFixedS64(const U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPDoubleToFixedU32(const U64& a, size_t fbits, FP::RoundingMode rounding); + U64 FPDoubleToFixedU64(const U64& a, size_t fbits, FP::RoundingMode rounding); + U32 FPSingleToFixedS32(const U32& a, size_t fbits, FP::RoundingMode rounding); + U64 FPSingleToFixedS64(const U32& a, size_t fbits, FP::RoundingMode rounding); + U32 FPSingleToFixedU32(const U32& a, size_t fbits, FP::RoundingMode rounding); + U64 FPSingleToFixedU64(const U32& a, size_t fbits, FP::RoundingMode rounding); U32 FPS32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U32 FPU32ToSingle(const U32& a, bool round_to_nearest, bool fpscr_controlled); U64 FPS32ToDouble(const U32& a, bool round_to_nearest, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 83ec4820..c4b5e165 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -386,10 +386,14 @@ OPCODE(FPSub64, T::U64, T::U64, T::U // Floating-point conversions OPCODE(FPSingleToDouble, T::U64, T::U32 ) OPCODE(FPDoubleToSingle, T::U32, T::U64 ) -OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 ) -OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 ) -OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 ) -OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 ) +OPCODE(FPDoubleToFixedS32, T::U32, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedS64, T::U64, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedU32, T::U32, T::U64, T::U8, T::U8 ) +OPCODE(FPDoubleToFixedU64, T::U64, T::U64, T::U8, T::U8 ) +OPCODE(FPSingleToFixedS32, T::U32, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedS64, T::U64, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedU32, T::U32, T::U32, T::U8, T::U8 ) +OPCODE(FPSingleToFixedU64, T::U64, T::U32, T::U8, T::U8 ) OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 ) OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )