From c15917b3509e191a92968a78691dde960afafe21 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Jan 2021 17:28:22 +0000 Subject: [PATCH] backend/x64: Add further Unsafe_InaccurateNaN locations --- src/backend/x64/emit_x64_floating_point.cpp | 20 +++++++++++++++++-- .../x64/emit_x64_vector_floating_point.cpp | 14 ++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index c31aed14..f64a8b7e 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - if (!ctx.FPCR().DN()) { + if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { end = ProcessNaN(code, result); } if constexpr (std::is_member_function_pointer_v) { @@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } else { fn(result); } - if (ctx.FPCR().DN()) { + if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + // Do nothing + } else if (ctx.FPCR().DN()) { ForceToDefaultNaN(code, result); } else { PostProcessNaN(code, result, ctx.reg_alloc.ScratchXmm()); @@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) auto args = ctx.reg_alloc.GetArgumentInfo(inst); + if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); + + if constexpr (std::is_member_function_pointer_v) { + (code.*fn)(result, operand); + } else { + fn(result, operand); + } + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + if (ctx.FPCR().DN()) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 07d4cd4e..d09e7412 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); - if (ctx.FPCR(fpcr_controlled).DN()) { + if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { Xbyak::Xmm result; if constexpr (std::is_member_function_pointer_v) { @@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins }); } - ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); + if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); + } ctx.reg_alloc.DefineValue(inst, result); return; @@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - if (ctx.FPCR(fpcr_controlled).DN()) { + if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); @@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i }); } - ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), xmm_a); + if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { + ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), xmm_a); + } ctx.reg_alloc.DefineValue(inst, xmm_a); return; @@ -988,7 +992,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[3].GetImmediateU1(); - if (code.HasFMA() && code.HasAVX()) { + if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);