emit_x64_floating_point: FlushToZero is redundant as hardware already does FTZ

2018-07-31 16:08:13 +01:00 · 2018-07-31 16:08:13 +01:00 · de9d8c461c
commit de9d8c461c
parent 822fd4a875
1 changed files with 0 additions and 42 deletions
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -97,33 +97,6 @@ void DenormalsAreZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_
    code.L(end);
 }

-template<size_t fsize>
-void FlushToZero(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
-    Xbyak::Label end;
-
-    if constexpr (fsize == 32) {
-        code.movd(gpr_scratch.cvt32(), xmm_value);
-        code.and_(gpr_scratch.cvt32(), u32(0x7FFFFFFF));
-        code.sub(gpr_scratch.cvt32(), u32(1));
-        code.cmp(gpr_scratch.cvt32(), u32(0x007FFFFE));
-    } else {
-        auto mask = code.MConst(xword, f64_non_sign_mask);
-        mask.setBit(64);
-        auto penult_denormal = code.MConst(xword, f64_penultimate_positive_denormal);
-        penult_denormal.setBit(64);
-
-        code.movq(gpr_scratch, xmm_value);
-        code.and_(gpr_scratch, mask);
-        code.sub(gpr_scratch, u32(1));
-        code.cmp(gpr_scratch, penult_denormal);
-    }
-
-    code.ja(end);
-    code.pxor(xmm_value, xmm_value);
-    code.mov(dword[r15 + code.GetJitStateInfo().offsetof_FPSCR_UFC], u32(1 << 3));
-    code.L(end);
-}
-
 template<size_t fsize>
 void ZeroIfNaN(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
    code.pxor(xmm_scratch, xmm_scratch);
@ -245,9 +218,6 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    } else {
        fn(result);
    }
-    if (ctx.FPSCR_FTZ()) {
-        FlushToZero<fsize>(code, result, gpr_scratch);
-    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN<fsize>(code, result);
    } else if (ctx.AccurateNaN()) {
@ -283,9 +253,6 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, [[maybe_unus
    } else {
        fn(result, operand);
    }
-    if (ctx.FPSCR_FTZ()) {
-        FlushToZero<fsize>(code, result, gpr_scratch);
-    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN<fsize>(code, result);
    } else if (ctx.AccurateNaN()) {
@ -321,9 +288,6 @@ void FPFourOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn,
        PreProcessNaNs<fsize>(code, ctx, result, operand2, operand3, end, nan_handler);
    }
    fn(result, operand2, operand3);
-    if (ctx.FPSCR_FTZ()) {
-        FlushToZero<fsize>(code, result, gpr_scratch);
-    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN<fsize>(code, result);
    } else if (ctx.AccurateNaN()) {
@ -923,9 +887,6 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
        DenormalsAreZero<32>(code, result, gpr_scratch);
    }
    code.cvtss2sd(result, result);
-    if (ctx.FPSCR_FTZ()) {
-        FlushToZero<64>(code, result, gpr_scratch);
-    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN<64>(code, result);
    }
@ -942,9 +903,6 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
        DenormalsAreZero<64>(code, result, gpr_scratch);
    }
    code.cvtsd2ss(result, result);
-    if (ctx.FPSCR_FTZ()) {
-        FlushToZero<32>(code, result, gpr_scratch);
-    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN<32>(code, result);
    }