A64: Add unsafe_optimizations option

* Strength reduce FMA unsafely
This commit is contained in:
MerryMage 2020-07-06 21:01:24 +01:00
parent 82868034d3
commit 761e95eec0
5 changed files with 35 additions and 0 deletions

View file

@ -138,6 +138,9 @@ struct UserConfig {
return (f & optimizations) != no_optimizations; return (f & optimizations) != no_optimizations;
} }
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
bool unsafe_optimizations = false;
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any /// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
/// data cache instruction is executed. Notably DC ZVA will not implicitly do anything. /// data cache instruction is executed. Notably DC ZVA will not implicitly do anything.
/// When set to false, UserCallbacks::DataCacheOperationRaised will never be called. /// When set to false, UserCallbacks::DataCacheOperationRaised will never be called.

View file

@ -29,6 +29,8 @@ struct A64EmitContext final : public EmitContext {
bool IsSingleStep() const; bool IsSingleStep() const;
FP::FPCR FPCR(bool fpcr_controlled = true) const override; FP::FPCR FPCR(bool fpcr_controlled = true) const override;
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
const A64::UserConfig& conf; const A64::UserConfig& conf;
}; };

View file

@ -51,6 +51,8 @@ struct EmitContext {
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
virtual bool UnsafeOptimizations() const { return false; }
RegAlloc& reg_alloc; RegAlloc& reg_alloc;
IR::Block& block; IR::Block& block;
}; };

View file

@ -637,6 +637,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
} }
if (ctx.UnsafeOptimizations()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
FCODE(muls)(operand2, operand3);
FCODE(adds)(operand1, operand2);
ctx.reg_alloc.DefineValue(inst, operand1);
return;
}
} }
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -1021,6 +1021,20 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result); ctx.reg_alloc.DefineValue(inst, result);
return; return;
} }
if (ctx.UnsafeOptimizations()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
FCODE(mulp)(operand2, operand3);
FCODE(addp)(operand1, operand2);
ctx.reg_alloc.DefineValue(inst, operand1);
return;
}
} }
EmitFourOpFallback(code, ctx, inst, fallback_fn); EmitFourOpFallback(code, ctx, inst, fallback_fn);