A64: Add unsafe_optimizations option
* Strength reduce FMA unsafely
This commit is contained in:
parent
82868034d3
commit
761e95eec0
5 changed files with 35 additions and 0 deletions
|
@ -138,6 +138,9 @@ struct UserConfig {
|
||||||
return (f & optimizations) != no_optimizations;
|
return (f & optimizations) != no_optimizations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
|
||||||
|
bool unsafe_optimizations = false;
|
||||||
|
|
||||||
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
/// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any
|
||||||
/// data cache instruction is executed. Notably DC ZVA will not implicitly do anything.
|
/// data cache instruction is executed. Notably DC ZVA will not implicitly do anything.
|
||||||
/// When set to false, UserCallbacks::DataCacheOperationRaised will never be called.
|
/// When set to false, UserCallbacks::DataCacheOperationRaised will never be called.
|
||||||
|
|
|
@ -29,6 +29,8 @@ struct A64EmitContext final : public EmitContext {
|
||||||
bool IsSingleStep() const;
|
bool IsSingleStep() const;
|
||||||
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
FP::FPCR FPCR(bool fpcr_controlled = true) const override;
|
||||||
|
|
||||||
|
bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; }
|
||||||
|
|
||||||
const A64::UserConfig& conf;
|
const A64::UserConfig& conf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,8 @@ struct EmitContext {
|
||||||
|
|
||||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||||
|
|
||||||
|
virtual bool UnsafeOptimizations() const { return false; }
|
||||||
|
|
||||||
RegAlloc& reg_alloc;
|
RegAlloc& reg_alloc;
|
||||||
IR::Block& block;
|
IR::Block& block;
|
||||||
};
|
};
|
||||||
|
|
|
@ -637,6 +637,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctx.UnsafeOptimizations()) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
||||||
|
FCODE(muls)(operand2, operand3);
|
||||||
|
FCODE(adds)(operand1, operand2);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
|
@ -1021,6 +1021,20 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
ctx.reg_alloc.DefineValue(inst, result);
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctx.UnsafeOptimizations()) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
|
||||||
|
|
||||||
|
FCODE(mulp)(operand2, operand3);
|
||||||
|
FCODE(addp)(operand1, operand2);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, operand1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitFourOpFallback(code, ctx, inst, fallback_fn);
|
EmitFourOpFallback(code, ctx, inst, fallback_fn);
|
||||||
|
|
Loading…
Reference in a new issue