diff --git a/src/backend/x64/a32_interface.cpp b/src/backend/x64/a32_interface.cpp index d86f8768..511fb796 100644 --- a/src/backend/x64/a32_interface.cpp +++ b/src/backend/x64/a32_interface.cpp @@ -270,7 +270,6 @@ void TransferJitState(A32JitState& dest, const A32JitState& src, bool reset_rsb) dest.Reg = src.Reg; dest.ExtReg = src.ExtReg; dest.guest_MXCSR = src.guest_MXCSR; - dest.fpsr_idc = src.fpsr_idc; dest.fpcr_mode = src.fpcr_mode; dest.fpsr_nzcv = src.fpsr_nzcv; if (reset_rsb) { diff --git a/src/backend/x64/a32_jitstate.cpp b/src/backend/x64/a32_jitstate.cpp index 7f0ad300..407c67bd 100644 --- a/src/backend/x64/a32_jitstate.cpp +++ b/src/backend/x64/a32_jitstate.cpp @@ -156,12 +156,10 @@ constexpr u32 FPSCR_NZCV_MASK = 0xF0000000; u32 A32JitState::Fpscr() const { ASSERT((fpcr_mode & ~FPSCR_MODE_MASK) == 0); ASSERT((fpsr_nzcv & ~FPSCR_NZCV_MASK) == 0); - ASSERT((fpsr_idc & ~(1 << 7)) == 0); u32 FPSCR = fpcr_mode | fpsr_nzcv; FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE - FPSCR |= fpsr_idc; FPSCR |= fpsr_exc; return FPSCR; @@ -180,7 +178,6 @@ void A32JitState::SetFpscr(u32 FPSCR) { guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3]; // Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC - fpsr_idc = 0; fpsr_exc = FPSCR & 0x9F; if (Common::Bit<24>(FPSCR)) { diff --git a/src/backend/x64/a32_jitstate.h b/src/backend/x64/a32_jitstate.h index 313ac707..0f6920b6 100644 --- a/src/backend/x64/a32_jitstate.h +++ b/src/backend/x64/a32_jitstate.h @@ -69,7 +69,6 @@ struct A32JitState { u32 fpsr_exc = 0; u32 fpsr_qc = 0; // Dummy value - u32 fpsr_idc = 0; u32 fpcr_mode = 0; u32 fpsr_nzcv = 0; u32 Fpscr() const; diff --git a/src/backend/x64/a64_jitstate.cpp b/src/backend/x64/a64_jitstate.cpp index 656120a0..351ecf69 100644 --- a/src/backend/x64/a64_jitstate.cpp +++ b/src/backend/x64/a64_jitstate.cpp @@ -103,7 +103,6 @@ u32 A64JitState::GetFpsr() const { u32 fpsr = 0; fpsr |= (guest_MXCSR & 0b0000000000001); // IOC = IE fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE - fpsr |= fpsr_idc; fpsr |= fpsr_exc; fpsr |= (fpsr_qc == 0 ? 0 : 1) << 27; return fpsr; @@ -111,7 +110,6 @@ u32 A64JitState::GetFpsr() const { void A64JitState::SetFpsr(u32 value) { guest_MXCSR &= ~0x0000003D; - fpsr_idc = 0; fpsr_qc = (value >> 27) & 1; fpsr_exc = value & 0x9F; } diff --git a/src/backend/x64/a64_jitstate.h b/src/backend/x64/a64_jitstate.h index 14320c63..5a5e93fb 100644 --- a/src/backend/x64/a64_jitstate.h +++ b/src/backend/x64/a64_jitstate.h @@ -73,7 +73,6 @@ struct A64JitState { u32 fpsr_exc = 0; u32 fpsr_qc = 0; - u32 fpsr_idc = 0; u32 fpcr = 0; u32 GetFpcr() const; u32 GetFpsr() const; diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 9987bddd..5dbe29e6 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -52,7 +52,6 @@ constexpr u64 f64_nan = 0x7ff8000000000000u; constexpr u64 f64_non_sign_mask = 0x7fffffffffffffffu; constexpr u64 f64_smallest_normal = 0x0010000000000000u; -constexpr u64 f64_penultimate_positive_denormal = 0x000ffffffffffffeu; constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double constexpr u64 f64_max_u32 = 0x41efffffffe00000u; // 4294967295 as a double @@ -89,33 +88,21 @@ std::optional ConvertRoundingModeToX64Immediate(FP::RoundingMode rounding_m } template -void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - Xbyak::Label end; - - if constexpr (fsize == 32) { - code.movd(gpr_scratch.cvt32(), xmm_value); - code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF)); - code.sub(gpr_scratch.cvt32(), u32(1)); - code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE)); - } else { - auto mask = code.MConst(xword, f64_non_sign_mask); - mask.setBit(64); - auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal); - penult_denormal.setBit(64); - - code.movq(gpr_scratch, xmm_value); - code.and_(gpr_scratch, mask); - code.sub(gpr_scratch, u32(1)); - code.cmp(gpr_scratch, penult_denormal); +void DenormalsAreZero(BlockOfCode& code, EmitContext& ctx, std::initializer_list to_daz, Xbyak::Xmm tmp) { + if (ctx.FPCR().FZ()) { + for (const Xbyak::Xmm& xmm : to_daz) { + // TODO: Optimize + code.movaps(tmp, code.MConst(xword, fsize == 32 ? f32_non_sign_mask : f64_non_sign_mask)); + code.andps(tmp, xmm); + if constexpr (fsize == 32) { + code.pcmpgtd(tmp, code.MConst(xword, f32_smallest_normal - 1)); + } else { + code.pcmpgtq(tmp, code.MConst(xword, f64_smallest_normal - 1)); + } + code.orps(tmp, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero)); + code.andps(xmm, tmp); + } } - - // We need to report back whether we've found a denormal on input. - // SSE doesn't do this for us when SSE's DAZ is enabled. - - code.ja(end); - code.andps(xmm_value, code.MConst(xword, fsize == 32 ? f32_negative_zero : f64_negative_zero)); - code.mov(dword[r15 + code.GetJitStateInfo().offsetof_fpsr_idc], u32(1 << 7)); - code.L(end); } template @@ -421,10 +408,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); - if (ctx.FPCR().FZ()) { - DenormalsAreZero(code, result, gpr_scratch); - DenormalsAreZero(code, operand, gpr_scratch); - } + DenormalsAreZero(code, ctx, {result, operand}, tmp); Xbyak::Label equal, end, nan; @@ -483,13 +467,9 @@ static void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } }; - if (ctx.FPCR().FZ()) { - DenormalsAreZero(code, op1, tmp.cvt64()); - DenormalsAreZero(code, op2, tmp.cvt64()); - } - Xbyak::Label end, z, nan, op2_is_nan, snan, maybe_both_nan, normal; + DenormalsAreZero(code, ctx, {op1, op2}, xmm0); FCODE(ucomis)(op1, op2); code.jz(z, code.T_NEAR); code.L(normal); diff --git a/src/backend/x64/jitstate_info.h b/src/backend/x64/jitstate_info.h index 273801f8..7725bf85 100644 --- a/src/backend/x64/jitstate_info.h +++ b/src/backend/x64/jitstate_info.h @@ -24,7 +24,6 @@ struct JitStateInfo { , offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors)) , offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs)) , offsetof_cpsr_nzcv(offsetof(JitStateType, cpsr_nzcv)) - , offsetof_fpsr_idc(offsetof(JitStateType, fpsr_idc)) , offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc)) , offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc)) {} @@ -38,7 +37,6 @@ struct JitStateInfo { const size_t offsetof_rsb_location_descriptors; const size_t offsetof_rsb_codeptrs; const size_t offsetof_cpsr_nzcv; - const size_t offsetof_fpsr_idc; const size_t offsetof_fpsr_exc; const size_t offsetof_fpsr_qc; };