From de9d8c461ce8ae2e0484ec82d4247551fd29f9bb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 31 Jul 2018 16:08:13 +0100 Subject: [PATCH] emit_x64_floating_point: FlushToZero is redundant as hardware already does FTZ --- src/backend_x64/emit_x64_floating_point.cpp | 42 --------------------- 1 file changed, 42 deletions(-) diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 4a37a676..672967dc 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -97,33 +97,6 @@ void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_ code.L(end); } -template -void FlushToZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) { - Xbyak::Label end; - - if constexpr (fsize == 32) { - code.movd(gpr_scratch.cvt32(), xmm_value); - code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF)); - code.sub(gpr_scratch.cvt32(), u32(1)); - code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE)); - } else { - auto mask = code.MConst(xword, f64_non_sign_mask); - mask.setBit(64); - auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal); - penult_denormal.setBit(64); - - code.movq(gpr_scratch, xmm_value); - code.and_(gpr_scratch, mask); - code.sub(gpr_scratch, u32(1)); - code.cmp(gpr_scratch, penult_denormal); - } - - code.ja(end); - code.pxor(xmm_value, xmm_value); - code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3)); - code.L(end); -} - template void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { code.pxor(xmm_scratch, xmm_scratch); @@ -245,9 +218,6 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } else { fn(result); } - if (ctx.FPSCR_FTZ()) { - FlushToZero(code, result, gpr_scratch); - } if (ctx.FPSCR_DN()) { DefaultNaN(code, result); } else if (ctx.AccurateNaN()) { @@ -283,9 +253,6 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus } else { fn(result, operand); } - if (ctx.FPSCR_FTZ()) { - FlushToZero(code, result, gpr_scratch); - } if (ctx.FPSCR_DN()) { DefaultNaN(code, result); } else if (ctx.AccurateNaN()) { @@ -321,9 +288,6 @@ void FPFourOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn, PreProcessNaNs(code, ctx, result, operand2, operand3, end, nan_handler); } fn(result, operand2, operand3); - if (ctx.FPSCR_FTZ()) { - FlushToZero(code, result, gpr_scratch); - } if (ctx.FPSCR_DN()) { DefaultNaN(code, result); } else if (ctx.AccurateNaN()) { @@ -923,9 +887,6 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { DenormalsAreZero<32>(code, result, gpr_scratch); } code.cvtss2sd(result, result); - if (ctx.FPSCR_FTZ()) { - FlushToZero<64>(code, result, gpr_scratch); - } if (ctx.FPSCR_DN()) { DefaultNaN<64>(code, result); } @@ -942,9 +903,6 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { DenormalsAreZero<64>(code, result, gpr_scratch); } code.cvtsd2ss(result, result); - if (ctx.FPSCR_FTZ()) { - FlushToZero<32>(code, result, gpr_scratch); - } if (ctx.FPSCR_DN()) { DefaultNaN<32>(code, result); }