backend/x64: Add further Unsafe_InaccurateNaN locations
This commit is contained in:
parent
f9ccf91b94
commit
c15917b350
2 changed files with 27 additions and 7 deletions
|
@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
|
|
||||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
||||||
if (!ctx.FPCR().DN()) {
|
if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
end = ProcessNaN<fsize>(code, result);
|
end = ProcessNaN<fsize>(code, result);
|
||||||
}
|
}
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||||
} else {
|
} else {
|
||||||
fn(result);
|
fn(result);
|
||||||
}
|
}
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
// Do nothing
|
||||||
|
} else if (ctx.FPCR().DN()) {
|
||||||
ForceToDefaultNaN<fsize>(code, result);
|
ForceToDefaultNaN<fsize>(code, result);
|
||||||
} else {
|
} else {
|
||||||
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
|
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
|
||||||
|
@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
(code.*fn)(result, operand);
|
||||||
|
} else {
|
||||||
|
fn(result, operand);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (ctx.FPCR().DN()) {
|
if (ctx.FPCR().DN()) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
|
@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
Xbyak::Xmm result;
|
Xbyak::Xmm result;
|
||||||
|
|
||||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||||
|
@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
|
@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||||
|
|
||||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
return;
|
return;
|
||||||
|
@ -988,7 +992,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||||
|
|
||||||
if (code.HasFMA() && code.HasAVX()) {
|
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
Loading…
Reference in a new issue