backend_x64: Accurately handle NaNs

2018-02-18 12:54:39 +00:00 · 2018-02-18 12:54:39 +00:00 · 07520f32c3
commit 07520f32c3
parent e97581d063
4 changed files with 383 additions and 28 deletions
--- a/src/backend_x64/emit_x64.h
+++ b/src/backend_x64/emit_x64.h
@ -36,6 +36,7 @@ struct EmitContext {
    virtual bool FPSCR_RoundTowardsZero() const = 0;
    virtual bool FPSCR_FTZ() const = 0;
    virtual bool FPSCR_DN() const = 0;
    virtual bool AccurateNaN() const { return true; }
    RegAlloc& reg_alloc;
    IR::Block& block;
--- a/src/backend_x64/emit_x64_floating_point.cpp
+++ b/src/backend_x64/emit_x64_floating_point.cpp
@ -4,10 +4,12 @@
 * General Public License version 2 or any later version.
 */
 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/fp_util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
 #include "frontend/ir/opcodes.h"
@ -95,33 +97,127 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
    code.L(end);
 }
 static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
    Xbyak::Label end;
    code.ucomiss(xmm_value, xmm_value);
    code.jnp(end);
    code.movaps(xmm_value, code.MConst(f32_nan));
    code.L(end);
 }
 static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
    Xbyak::Label end;
    code.ucomisd(xmm_value, xmm_value);
    code.jnp(end);
    code.movaps(xmm_value, code.MConst(f64_nan));
    code.L(end);
 }
 static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
    code.pxor(xmm_scratch, xmm_scratch);
    code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
    code.pand(xmm_value, xmm_scratch);
 }
 static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
    Xbyak::Label nan, end;
    code.ucomiss(a, b);
    code.jp(nan, code.T_NEAR);
    code.SwitchToFarCode();
    code.L(nan);
    code.sub(rsp, 8);
    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
    code.xor_(code.ABI_PARAM1.cvt32(), code.ABI_PARAM1.cvt32());
    code.xor_(code.ABI_PARAM2.cvt32(), code.ABI_PARAM2.cvt32());
    code.movd(code.ABI_PARAM1.cvt32(), a);
    code.movd(code.ABI_PARAM2.cvt32(), b);
    code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
        return *Common::ProcessNaNs(a, b);
    }));
    code.movd(a, code.ABI_RETURN.cvt32());
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
    code.add(rsp, 8);
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    return end;
 }
 static void PostProcessNaNs32(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
    code.movaps(tmp, result);
    code.cmpunordps(tmp, tmp);
    code.pslld(tmp, 31);
    code.xorps(result, tmp);
 }
 static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
    Xbyak::Label end;
    code.ucomiss(xmm_value, xmm_value);
    code.jnp(end);
    code.movaps(xmm_value, code.MConst(f32_nan));
    code.L(end);
 }
 static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
    Xbyak::Label nan, end;
    code.ucomisd(a, b);
    code.jp(nan, code.T_NEAR);
    code.SwitchToFarCode();
    code.L(nan);
    code.sub(rsp, 8);
    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
    code.movq(code.ABI_PARAM1, a);
    code.movq(code.ABI_PARAM2, b);
    code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
        return *Common::ProcessNaNs(a, b);
    }));
    code.movq(a, code.ABI_RETURN);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
    code.add(rsp, 8);
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    return end;
 }
 static void PostProcessNaNs64(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
    code.movaps(tmp, result);
    code.cmpunordpd(tmp, tmp);
    code.psllq(tmp, 63);
    code.xorps(result, tmp);
 }
 static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
    Xbyak::Label end;
    code.ucomisd(xmm_value, xmm_value);
    code.jnp(end);
    code.movaps(xmm_value, code.MConst(f64_nan));
    code.L(end);
 }
 static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) {
    Xbyak::Label nan, end;
    code.ucomiss(a, a);
    code.jp(nan, code.T_NEAR);
    code.SwitchToFarCode();
    code.L(nan);
    code.orps(a, code.MConst(0x00400000));
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    return end;
 }
 static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
    Xbyak::Label nan, end;
    code.ucomisd(a, a);
    code.jp(nan, code.T_NEAR);
    code.SwitchToFarCode();
    code.L(nan);
    code.orps(a, code.MConst(0x0008'0000'0000'0000));
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    return end;
 }
 static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
@ -130,13 +226,19 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
        DenormalsAreZero32(code, result, gpr_scratch);
        DenormalsAreZero32(code, operand, gpr_scratch);
    }
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = PreProcessNaNs32(code, result, operand);
    }
    (code.*fn)(result, operand);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN32(code, result);
    } else if (ctx.AccurateNaN()) {
        PostProcessNaNs32(code, result, operand);
    }
    code.L(end);
    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -144,6 +246,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
 static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
    Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
@ -152,13 +256,19 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
        DenormalsAreZero64(code, result, gpr_scratch);
        DenormalsAreZero64(code, operand, gpr_scratch);
    }
    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = PreProcessNaNs64(code, result, operand);
    }
    (code.*fn)(result, operand);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN64(code, result);
    } else if (ctx.AccurateNaN()) {
        PostProcessNaNs64(code, result, operand);
    }
    code.L(end);
    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -166,20 +276,27 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
 static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero32(code, result, gpr_scratch);
    }
-
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = ProcessNaN32(code, result);
    }
    (code.*fn)(result, result);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero32(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN32(code, result);
    } else if (ctx.AccurateNaN()) {
        PostProcessNaNs32(code, result, ctx.reg_alloc.ScratchXmm());
    }
    code.L(end);
    ctx.reg_alloc.DefineValue(inst, result);
 }
@ -187,20 +304,27 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
 static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end;
    Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
    Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
    if (ctx.FPSCR_FTZ()) {
        DenormalsAreZero64(code, result, gpr_scratch);
    }
-
+    if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
        end = ProcessNaN64(code, result);
    }
    (code.*fn)(result, result);
    if (ctx.FPSCR_FTZ()) {
        FlushToZero64(code, result, gpr_scratch);
    }
    if (ctx.FPSCR_DN()) {
        DefaultNaN64(code, result);
    } else if (ctx.AccurateNaN()) {
        PostProcessNaNs64(code, result, ctx.reg_alloc.ScratchXmm());
    }
    code.L(end);
    ctx.reg_alloc.DefineValue(inst, result);
 }
--- a/src/backend_x64/emit_x64_vector_floating_point.cpp
+++ b/src/backend_x64/emit_x64_vector_floating_point.cpp
@ -4,8 +4,10 @@
 * General Public License version 2 or any later version.
 */
 #include "backend_x64/abi.h"
 #include "backend_x64/block_of_code.h"
 #include "backend_x64/emit_x64.h"
 #include "common/fp_util.h"
 #include "frontend/ir/basic_block.h"
 #include "frontend/ir/microinstruction.h"
@ -14,31 +16,183 @@ namespace Dynarmic::BackendX64 {
 using namespace Xbyak::util;
 template <typename Function>
-static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
+static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
        Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
        Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
        (code.*fn)(xmm_a, xmm_b);
        if (ctx.FPSCR_DN()) {
            Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
            Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
            code.pcmpeqw(tmp, tmp);
            code.movaps(nan_mask, xmm_a);
            code.cmpordps(nan_mask, nan_mask);
            code.andps(xmm_a, nan_mask);
            code.xorps(nan_mask, tmp);
            code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
            code.orps(xmm_a, nan_mask);
        }
        ctx.reg_alloc.DefineValue(inst, xmm_a);
        return;
    }
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
-    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
+    Xbyak::Label end, nan;
    Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
    Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
    Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
-    (code.*fn)(xmm_a, xmm_b);
+    code.movaps(nan_mask, xmm_b);
    code.movaps(result, xmm_a);
    code.cmpunordps(nan_mask, xmm_a);
    (code.*fn)(result, xmm_b);
    code.cmpunordps(nan_mask, result);
    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
        code.ptest(nan_mask, nan_mask);
    } else {
        Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
        code.movmskps(bitmask, nan_mask);
        code.cmp(bitmask, 0);
    }
    code.jz(end);
    code.jmp(nan, code.T_NEAR);
    code.L(end);
-    ctx.reg_alloc.DefineValue(inst, xmm_a);
+    code.SwitchToFarCode();
    code.L(nan);
    code.sub(rsp, 8);
    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
    const size_t stack_space = 3 * 16;
    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
    code.movaps(xword[code.ABI_PARAM1], result);
    code.movaps(xword[code.ABI_PARAM2], xmm_a);
    code.movaps(xword[code.ABI_PARAM3], xmm_b);
    code.CallFunction(static_cast<void(*)(std::array<u32, 4>&, const std::array<u32, 4>&, const std::array<u32, 4>&)>(
        [](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b) {
            for (size_t i = 0; i < 4; ++i) {
                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
                    result[i] = *r;
                } else if (Common::IsNaN(result[i])) {
                    result[i] = 0x7fc00000;
                }
            }
        }
    ));
    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
    code.add(rsp, 8);
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    ctx.reg_alloc.DefineValue(inst, result);
 }
 template <typename Function>
 static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
    if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
        auto args = ctx.reg_alloc.GetArgumentInfo(inst);
        Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
        Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
        (code.*fn)(xmm_a, xmm_b);
        if (ctx.FPSCR_DN()) {
            Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
            Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
            code.pcmpeqw(tmp, tmp);
            code.movaps(nan_mask, xmm_a);
            code.cmpordpd(nan_mask, nan_mask);
            code.andps(xmm_a, nan_mask);
            code.xorps(nan_mask, tmp);
            code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
            code.orps(xmm_a, nan_mask);
        }
        ctx.reg_alloc.DefineValue(inst, xmm_a);
        return;
    }
    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
    Xbyak::Label end, nan;
    Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
    Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
    Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
    Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
    code.movaps(nan_mask, xmm_b);
    code.movaps(result, xmm_a);
    code.cmpunordpd(nan_mask, xmm_a);
    (code.*fn)(result, xmm_b);
    code.cmpunordpd(nan_mask, result);
    if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
        code.ptest(nan_mask, nan_mask);
    } else {
        Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
        code.movmskps(bitmask, nan_mask);
        code.cmp(bitmask, 0);
    }
    code.jz(end);
    code.jmp(nan, code.T_NEAR);
    code.L(end);
    code.SwitchToFarCode();
    code.L(nan);
    code.sub(rsp, 8);
    ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
    const size_t stack_space = 3 * 16;
    code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
    code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
    code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
    code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
    code.movaps(xword[code.ABI_PARAM1], result);
    code.movaps(xword[code.ABI_PARAM2], xmm_a);
    code.movaps(xword[code.ABI_PARAM3], xmm_b);
    code.CallFunction(static_cast<void(*)(std::array<u64, 2>&, const std::array<u64, 2>&, const std::array<u64, 2>&)>(
        [](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b) {
            for (size_t i = 0; i < 4; ++i) {
                if (auto r = Common::ProcessNaNs(a[i], b[i])) {
                    result[i] = *r;
                } else if (Common::IsNaN(result[i])) {
                    result[i] = 0x7ff8'0000'0000'0000;
                }
            }
        }
    ));
    code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
    code.add(rsp, stack_space + ABI_SHADOW_SPACE);
    ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
    code.add(rsp, 8);
    code.jmp(end, code.T_NEAR);
    code.SwitchToNearCode();
    ctx.reg_alloc.DefineValue(inst, result);
 }
 void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addps);
+    EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps);
 }
 void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
+    EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
 }
 void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subps);
+    EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
 }
 void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
-    EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
+    EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
 }
 } // namespace Dynarmic::BackendX64
--- a/src/common/fp_util.h
+++ b/src/common/fp_util.h
@ -0,0 +1,76 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 #include <boost/optional.hpp>
 namespace Dynarmic {
 namespace Common {
 /// Is 32-bit floating point value a QNaN?
 constexpr bool IsQNaN(u32 value) {
    return (value & 0x7fc00000) == 0x7fc00000;
 }
 /// Is 32-bit floating point value a SNaN?
 constexpr bool IsSNaN(u32 value) {
    return (value & 0x7fc00000) == 0x7f800000 && (value & 0x007fffff) != 0;
 }
 /// Is 32-bit floating point value a NaN?
 constexpr bool IsNaN(u32 value) {
    return IsQNaN(value) || IsSNaN(value);
 }
 /// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
 /// If neither argument is a NaN, returns boost::none.
 inline boost::optional<u32> ProcessNaNs(u32 a, u32 b) {
    if (IsSNaN(a)) {
        return a | 0x00400000;
    } else if (IsSNaN(b)) {
        return b | 0x00400000;
    } else if (IsQNaN(a)) {
        return a;
    } else if (IsQNaN(b)) {
        return b;
    }
    return boost::none;
 }
 /// Is 64-bit floating point value a QNaN?
 constexpr bool IsQNaN(u64 value) {
    return (value & 0x7FF8'0000'0000'0000) == 0x7FF8'0000'0000'0000;
 }
 /// Is 64-bit floating point value a SNaN?
 constexpr bool IsSNaN(u64 value) {
    return (value & 0x7FF8'0000'0000'0000) == 0x7FF0'0000'0000'0000
        && (value & 0x0007'FFFF'FFFF'FFFF) != 0;
 }
 /// Is 64-bit floating point value a NaN?
 constexpr bool IsNaN(u64 value) {
    return IsQNaN(value) || IsSNaN(value);
 }
 /// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
 /// If neither argument is a NaN, returns boost::none.
 inline boost::optional<u64> ProcessNaNs(u64 a, u64 b) {
    if (IsSNaN(a)) {
        return a | 0x0008'0000'0000'0000;
    } else if (IsSNaN(b)) {
        return b | 0x0008'0000'0000'0000;
    } else if (IsQNaN(a)) {
        return a;
    } else if (IsQNaN(b)) {
        return b;
    }
    return boost::none;
 }
 } // namespace Common
 } // namespace Dynarmic