backend/x64: Add further Unsafe_InaccurateNaN locations

This commit is contained in:
MerryMage 2021-01-02 17:28:22 +00:00
parent f9ccf91b94
commit c15917b350
2 changed files with 27 additions and 7 deletions

View file

@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
if (!ctx.FPCR().DN()) { if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
end = ProcessNaN<fsize>(code, result); end = ProcessNaN<fsize>(code, result);
} }
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
} else { } else {
fn(result); fn(result);
} }
if (ctx.FPCR().DN()) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
// Do nothing
} else if (ctx.FPCR().DN()) {
ForceToDefaultNaN<fsize>(code, result); ForceToDefaultNaN<fsize>(code, result);
} else { } else {
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm()); PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) {
(code.*fn)(result, operand);
} else {
fn(result, operand);
}
ctx.reg_alloc.DefineValue(inst, result);
return;
}
if (ctx.FPCR().DN()) { if (ctx.FPCR().DN()) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);

View file

@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN()) { if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
Xbyak::Xmm result; Xbyak::Xmm result;
if constexpr (std::is_member_function_pointer_v<Function>) { if constexpr (std::is_member_function_pointer_v<Function>) {
@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}); });
} }
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result); if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
}
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1(); const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN()) { if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
}); });
} }
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a); if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
}
ctx.reg_alloc.DefineValue(inst, xmm_a); ctx.reg_alloc.DefineValue(inst, xmm_a);
return; return;
@ -988,7 +992,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[3].GetImmediateU1(); const bool fpcr_controlled = args[3].GetImmediateU1();
if (code.HasFMA() && code.HasAVX()) { if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);