Unsafe Optimization: Extend Unsafe_UnfuseFMA to all FMA-related instructions
This commit is contained in:
parent
d05d95c132
commit
6bbc53839f
2 changed files with 62 additions and 0 deletions
|
@ -845,6 +845,21 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||
FCODE(muls)(operand1, operand2);
|
||||
FCODE(subs)(result, operand1);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
@ -1030,6 +1045,22 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 3>()));
|
||||
FCODE(muls)(operand1, operand2);
|
||||
FCODE(subs)(result, operand1);
|
||||
FCODE(muls)(result, code.MConst(xword, FP::FPValue<FPT, false, -1, 1>()));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
|
|
@ -1267,6 +1267,21 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
|
||||
FCODE(mulp)(operand1, operand2);
|
||||
FCODE(subp)(result, operand1);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
EmitThreeOpFallback(code, ctx, inst, fallback_fn);
|
||||
|
@ -1453,6 +1468,22 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(result, GetVectorOf<fsize, false, 0, 3>(code));
|
||||
FCODE(mulp)(operand1, operand2);
|
||||
FCODE(subp)(result, operand1);
|
||||
FCODE(mulp)(result, GetVectorOf<fsize, false, -1, 1>(code));
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
EmitThreeOpFallback(code, ctx, inst, fallback_fn);
|
||||
|
|
Loading…
Reference in a new issue