Introduce Unsafe_InaccurateNaN

2021-01-01 07:17:34 +00:00 · 2021-01-01 07:17:34 +00:00 · eeeafaf5fb
commit eeeafaf5fb
parent 4a9a0d07f7
2 changed files with 19 additions and 0 deletions
--- a/include/dynarmic/optimization_flags.h
+++ b/include/dynarmic/optimization_flags.h
@ -39,6 +39,9 @@ enum class OptimizationFlag : std::uint32_t {
    /// This is an UNSAFE optimization that reduces accuracy of certain floating-point instructions.
    /// This allows results of FRECPE and FRSQRTE to have **less** error than spec allows.
    Unsafe_ReducedErrorFP   = 0x00020000,
+    /// This is an UNSAFE optimization that causes floating-point instructions to not produce correct NaNs.
+    /// This may also result in inaccurate results when instructions are given certain special values.
+    Unsafe_InaccurateNaN    = 0x00040000,
 };

 constexpr OptimizationFlag no_optimizations = static_cast<OptimizationFlag>(0);
--- a/src/backend/x64/emit_x64_floating_point.cpp
+++ b/src/backend/x64/emit_x64_floating_point.cpp
@ -810,6 +810,22 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
    using FPT = mp::unsigned_integer_of_size<fsize>;

    if constexpr (fsize != 16) {
+        if (code.HasFMA() && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
+            auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+            Xbyak::Label end, fallback;
+
+            const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
+            const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
+            const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+
+            code.movaps(result, code.MConst(xword, FP::FPValue<FPT, false, 0, 2>()));
+            FCODE(vfnmadd231s)(result, operand1, operand2);
+
+            ctx.reg_alloc.DefineValue(inst, result);
+            return;
+        }
+
        if (code.HasFMA()) {
            auto args = ctx.reg_alloc.GetArgumentInfo(inst);