backend_x64: Accurately handle NaNs

2018-02-18 12:54:39 +00:00 · 2018-02-18 12:54:39 +00:00 · 07520f32c3
commit 07520f32c3
parent e97581d063
4 changed files with 383 additions and 28 deletions
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@ -36,6 +36,7 @@ struct EmitContext {
    virtual bool FPSCR_RoundTowardsZero() const = 0;
    virtual bool FPSCR_FTZ() const = 0;
    virtual bool FPSCR_DN() const = 0;
+    virtual bool AccurateNaN() const { return true; }

    RegAlloc& reg_alloc;
    IR::Block& block;
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -4,10 +4,12 @@
 * General Public License version 2 or any later version.
 */

+#include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/fp_util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -95,33 +97,127 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
    code.L(end);
 }

-static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
-    Xbyak::Label end;
-
-    code.ucomiss(xmm_value, xmm_value);
-    code.jnp(end);
-    code.movaps(xmm_value, code.MConst(f32_nan));
-    code.L(end);
-}
-
-static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
-    Xbyak::Label end;
-
-    code.ucomisd(xmm_value, xmm_value);
-    code.jnp(end);
-    code.movaps(xmm_value, code.MConst(f64_nan));
-    code.L(end);
-}
-
 static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
    code.pxor(xmm_scratch, xmm_scratch);
    code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
    code.pand(xmm_value, xmm_scratch);
 }

+static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
+    Xbyak::Label nan, end;
+
+    code.ucomiss(a, b);
+    code.jp(nan, code.T_NEAR);
+    code.SwitchToFarCode();
+    code.L(nan);
+
+    code.sub(rsp, 8);
+    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
+    code.xor_(code.ABI_PARAM1.cvt32(), code.ABI_PARAM1.cvt32());
+    code.xor_(code.ABI_PARAM2.cvt32(), code.ABI_PARAM2.cvt32());
+    code.movd(code.ABI_PARAM1.cvt32(), a);
+    code.movd(code.ABI_PARAM2.cvt32(), b);
+    code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
+        return *Common::ProcessNaNs(a, b);
+    }));
+    code.movd(a, code.ABI_RETURN.cvt32());
+    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
+    code.add(rsp, 8);
+
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+    return end;
+}
+
+static void PostProcessNaNs32(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
+    code.movaps(tmp, result);
+    code.cmpunordps(tmp, tmp);
+    code.pslld(tmp, 31);
+    code.xorps(result, tmp);
+}
+
+static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
+    Xbyak::Label end;
+    code.ucomiss(xmm_value, xmm_value);
+    code.jnp(end);
+    code.movaps(xmm_value, code.MConst(f32_nan));
+    code.L(end);
+}
+
+static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
+    Xbyak::Label nan, end;
+
+    code.ucomisd(a, b);
+    code.jp(nan, code.T_NEAR);
+    code.SwitchToFarCode();
+    code.L(nan);
+
+    code.sub(rsp, 8);
+    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
+    code.movq(code.ABI_PARAM1, a);
+    code.movq(code.ABI_PARAM2, b);
+    code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
+        return *Common::ProcessNaNs(a, b);
+    }));
+    code.movq(a, code.ABI_RETURN);
+    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
+    code.add(rsp, 8);
+
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+    return end;
+}
+
+static void PostProcessNaNs64(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
+    code.movaps(tmp, result);
+    code.cmpunordpd(tmp, tmp);
+    code.psllq(tmp, 63);
+    code.xorps(result, tmp);
+}
+
+static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
+    Xbyak::Label end;
+    code.ucomisd(xmm_value, xmm_value);
+    code.jnp(end);
+    code.movaps(xmm_value, code.MConst(f64_nan));
+    code.L(end);
+}
+
+static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) {
+    Xbyak::Label nan, end;
+
+    code.ucomiss(a, a);
+    code.jp(nan, code.T_NEAR);
+    code.SwitchToFarCode();
+    code.L(nan);
+
+    code.orps(a, code.MConst(0x00400000));
+
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+    return end;
+}
+
+static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
+    Xbyak::Label nan, end;
+
+    code.ucomisd(a, a);
+    code.jp(nan, code.T_NEAR);
+    code.SwitchToFarCode();
+    code.L(nan);
+
+    code.orps(a, code.MConst(0x0008'0000'0000'0000));
+
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+    return end;
+}
+
 static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

+    Xbyak::Label end;
+
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
@ -130,13 +226,19 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
        DenormalsAreZero32(code, result, gpr_scratch);
        DenormalsAreZero32(code, operand, gpr_scratch);
    }
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
+        end = PreProcessNaNs32(code, result, operand);
+    }
    (code.*fn)(result, operand);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN32(code, result);
+    } else if (ctx.AccurateNaN()) {
+        PostProcessNaNs32(code, result, operand);
    }
+    code.L(end);

    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -144,6 +246,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
 static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

+    Xbyak::Label end;
+
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
    Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
@ -152,13 +256,19 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
        DenormalsAreZero64(code, result, gpr_scratch);
        DenormalsAreZero64(code, operand, gpr_scratch);
    }
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
+        end = PreProcessNaNs64(code, result, operand);
+    }
    (code.*fn)(result, operand);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN64(code, result);
+    } else if (ctx.AccurateNaN()) {
+        PostProcessNaNs64(code, result, operand);
    }
+    code.L(end);

    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -166,20 +276,27 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
 static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

+    Xbyak::Label end;
+
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();

    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero32(code, result, gpr_scratch);
    }
-
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
+        end = ProcessNaN32(code, result);
+    }
    (code.*fn)(result, result);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN32(code, result);
+    } else if (ctx.AccurateNaN()) {
+        PostProcessNaNs32(code, result, ctx.reg_alloc.ScratchXmm());
    }
+    code.L(end);

    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -187,20 +304,27 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
 static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

+    Xbyak::Label end;
+
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();

    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero64(code, result, gpr_scratch);
    }
-
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
+        end = ProcessNaN64(code, result);
+    }
    (code.*fn)(result, result);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN64(code, result);
+    } else if (ctx.AccurateNaN()) {
+        PostProcessNaNs64(code, result, ctx.reg_alloc.ScratchXmm());
    }
+    code.L(end);

    ctx.reg_alloc.DefineValue(inst, result);
 }
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@ -4,8 +4,10 @@
 * General Public License version 2 or any later version.
 */

+#include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
+#include "common/fp_util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"

@ -14,31 +16,183 @@ namespace Dynarmic::BackendX64 {
 using namespace Xbyak::util;

 template <typename Function>
-static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+    if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
+        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+        Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+        (code.*fn)(xmm_a, xmm_b);
+
+        if (ctx.FPSCR_DN()) {
+            Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
+            Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+            code.pcmpeqw(tmp, tmp);
+            code.movaps(nan_mask, xmm_a);
+            code.cmpordps(nan_mask, nan_mask);
+            code.andps(xmm_a, nan_mask);
+            code.xorps(nan_mask, tmp);
+            code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
+            code.orps(xmm_a, nan_mask);
+        }
+
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+        return;
+    }
+
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);

-    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    Xbyak::Label end, nan;
+    Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
    Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+    Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();

-    (code.*fn)(xmm_a, xmm_b);
+    code.movaps(nan_mask, xmm_b);
+    code.movaps(result, xmm_a);
+    code.cmpunordps(nan_mask, xmm_a);
+    (code.*fn)(result, xmm_b);
+    code.cmpunordps(nan_mask, result);
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
+        code.ptest(nan_mask, nan_mask);
+    } else {
+        Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
+        code.movmskps(bitmask, nan_mask);
+        code.cmp(bitmask, 0);
+    }
+    code.jz(end);
+    code.jmp(nan, code.T_NEAR);
+    code.L(end);

-    ctx.reg_alloc.DefineValue(inst, xmm_a);
+    code.SwitchToFarCode();
+    code.L(nan);
+    code.sub(rsp, 8);
+    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
+    const size_t stack_space = 3 * 16;
+    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
+    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
+    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
+    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
+    code.movaps(xword[code.ABI_PARAM1], result);
+    code.movaps(xword[code.ABI_PARAM2], xmm_a);
+    code.movaps(xword[code.ABI_PARAM3], xmm_b);
+    code.CallFunction(static_cast<void(*)(std::array<u32, 4>&, const std::array<u32, 4>&, const std::array<u32, 4>&)>(
+        [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b) {
+            for (size_t i = 0; i < 4; ++i) {
+                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
+                    result[i] = *r;
+                } else if (Common::IsNaN(result[i])) {
+                    result[i] = 0x7fc00000;
+                }
+            }
+        }
+    ));
+    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
+    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
+    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
+    code.add(rsp, 8);
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+
+    ctx.reg_alloc.DefineValue(inst, result);
+}
+
+template <typename Function>
+static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+    if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
+        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+        Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+        Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+
+        (code.*fn)(xmm_a, xmm_b);
+
+        if (ctx.FPSCR_DN()) {
+            Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
+            Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
+            code.pcmpeqw(tmp, tmp);
+            code.movaps(nan_mask, xmm_a);
+            code.cmpordpd(nan_mask, nan_mask);
+            code.andps(xmm_a, nan_mask);
+            code.xorps(nan_mask, tmp);
+            code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
+            code.orps(xmm_a, nan_mask);
+        }
+
+        ctx.reg_alloc.DefineValue(inst, xmm_a);
+        return;
+    }
+
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+
+    Xbyak::Label end, nan;
+    Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
+    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
+    Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
+    Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
+
+    code.movaps(nan_mask, xmm_b);
+    code.movaps(result, xmm_a);
+    code.cmpunordpd(nan_mask, xmm_a);
+    (code.*fn)(result, xmm_b);
+    code.cmpunordpd(nan_mask, result);
+    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
+        code.ptest(nan_mask, nan_mask);
+    } else {
+        Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
+        code.movmskps(bitmask, nan_mask);
+        code.cmp(bitmask, 0);
+    }
+    code.jz(end);
+    code.jmp(nan, code.T_NEAR);
+    code.L(end);
+
+    code.SwitchToFarCode();
+    code.L(nan);
+    code.sub(rsp, 8);
+    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
+    const size_t stack_space = 3 * 16;
+    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
+    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
+    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
+    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
+    code.movaps(xword[code.ABI_PARAM1], result);
+    code.movaps(xword[code.ABI_PARAM2], xmm_a);
+    code.movaps(xword[code.ABI_PARAM3], xmm_b);
+    code.CallFunction(static_cast<void(*)(std::array<u64, 2>&, const std::array<u64, 2>&, const std::array<u64, 2>&)>(
+        [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b) {
+            for (size_t i = 0; i < 4; ++i) {
+                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
+                    result[i] = *r;
+                } else if (Common::IsNaN(result[i])) {
+                    result[i] = 0x7ff8'0000'0000'0000;
+                }
+            }
+        }
+    ));
+    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
+    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
+    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
+    code.add(rsp, 8);
+    code.jmp(end, code.T_NEAR);
+    code.SwitchToNearCode();
+
+    ctx.reg_alloc.DefineValue(inst, result);
 }

 void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addps);
+    EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps);
 }

 void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
+    EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
 }

 void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subps);
+    EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
 }

 void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
+    EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
 }

 } // namespace Dynarmic::BackendX64
--- a/src/common/fp_util.h
+++ b/src/common/fp_util.h
@ -0,0 +1,76 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+
+namespace Dynarmic {
+namespace Common {
+
+/// Is 32-bit floating point value a QNaN?
+constexpr bool IsQNaN(u32 value) {
+    return (value & 0x7fc00000) == 0x7fc00000;
+}
+
+/// Is 32-bit floating point value a SNaN?
+constexpr bool IsSNaN(u32 value) {
+    return (value & 0x7fc00000) == 0x7f800000 && (value & 0x007fffff) != 0;
+}
+
+/// Is 32-bit floating point value a NaN?
+constexpr bool IsNaN(u32 value) {
+    return IsQNaN(value) || IsSNaN(value);
+}
+
+/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
+/// If neither argument is a NaN, returns boost::none.
+inline boost::optional<u32> ProcessNaNs(u32 a, u32 b) {
+    if (IsSNaN(a)) {
+        return a | 0x00400000;
+    } else if (IsSNaN(b)) {
+        return b | 0x00400000;
+    } else if (IsQNaN(a)) {
+        return a;
+    } else if (IsQNaN(b)) {
+        return b;
+    }
+    return boost::none;
+}
+
+/// Is 64-bit floating point value a QNaN?
+constexpr bool IsQNaN(u64 value) {
+    return (value & 0x7FF8'0000'0000'0000) == 0x7FF8'0000'0000'0000;
+}
+
+/// Is 64-bit floating point value a SNaN?
+constexpr bool IsSNaN(u64 value) {
+    return (value & 0x7FF8'0000'0000'0000) == 0x7FF0'0000'0000'0000
+        && (value & 0x0007'FFFF'FFFF'FFFF) != 0;
+}
+
+/// Is 64-bit floating point value a NaN?
+constexpr bool IsNaN(u64 value) {
+    return IsQNaN(value) || IsSNaN(value);
+}
+
+/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
+/// If neither argument is a NaN, returns boost::none.
+inline boost::optional<u64> ProcessNaNs(u64 a, u64 b) {
+    if (IsSNaN(a)) {
+        return a | 0x0008'0000'0000'0000;
+    } else if (IsSNaN(b)) {
+        return b | 0x0008'0000'0000'0000;
+    } else if (IsQNaN(a)) {
+        return a;
+    } else if (IsQNaN(b)) {
+        return b;
+    }
+    return boost::none;
+}
+
+} // namespace Common
+} // namespace Dynarmic