backend/x64: Add further Unsafe_InaccurateNaN locations
This commit is contained in:
parent
f9ccf91b94
commit
c15917b350
2 changed files with 27 additions and 7 deletions
|
@ -257,7 +257,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
|||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
if (!ctx.FPCR().DN()) {
|
||||
if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
end = ProcessNaN<fsize>(code, result);
|
||||
}
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
|
@ -265,7 +265,9 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
|||
} else {
|
||||
fn(result);
|
||||
}
|
||||
if (ctx.FPCR().DN()) {
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
// Do nothing
|
||||
} else if (ctx.FPCR().DN()) {
|
||||
ForceToDefaultNaN<fsize>(code, result);
|
||||
} else {
|
||||
PostProcessNaN<fsize>(code, result, ctx.reg_alloc.ScratchXmm());
|
||||
|
@ -281,6 +283,20 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn)
|
|||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
(code.*fn)(result, operand);
|
||||
} else {
|
||||
fn(result, operand);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.FPCR().DN()) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
|
|
@ -290,7 +290,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
||||
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
Xbyak::Xmm result;
|
||||
|
||||
if constexpr (std::is_member_function_pointer_v<Function>) {
|
||||
|
@ -306,7 +306,9 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
|
|||
});
|
||||
}
|
||||
|
||||
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
|
@ -342,7 +344,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[2].GetImmediateU1();
|
||||
|
||||
if (ctx.FPCR(fpcr_controlled).DN()) {
|
||||
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
|
@ -356,7 +358,9 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
});
|
||||
}
|
||||
|
||||
if (!ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
return;
|
||||
|
@ -988,7 +992,7 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const bool fpcr_controlled = args[3].GetImmediateU1();
|
||||
|
||||
if (code.HasFMA() && code.HasAVX()) {
|
||||
if (code.HasFMA() && code.HasAVX() && ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
|
||||
|
|
Loading…
Reference in a new issue