Introduce Unsafe_InaccurateNaN
This commit is contained in:
parent
4a9a0d07f7
commit
eeeafaf5fb
2 changed files with 19 additions and 0 deletions
|
@ -39,6 +39,9 @@ enum class OptimizationFlag : std::uint32_t {
|
||||||
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
/// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
|
||||||
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
/// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
|
||||||
Unsafe_ReducedErrorFP = 0x00020000,
|
Unsafe_ReducedErrorFP = 0x00020000,
|
||||||
|
/// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs.
|
||||||
|
/// This may also result in inaccurate results when instructions are given certain special values.
|
||||||
|
Unsafe_InaccurateNaN = 0x00040000,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
|
||||||
|
|
|
@ -810,6 +810,22 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
||||||
using FPT = mp::unsigned_integer_of_size<fsize>;
|
using FPT = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
if constexpr (fsize != 16) {
|
if constexpr (fsize != 16) {
|
||||||
|
if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
Xbyak::Label end, fallback;
|
||||||
|
|
||||||
|
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
|
||||||
|
FCODE(vfnmadd231s)(result, operand1, operand2);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (code.HasFMA()) {
|
if (code.HasFMA()) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue