From 07520f32c3936f040b9f1e4a5f1105e9e46b3d62 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 18 Feb 2018 12:54:39 +0000 Subject: [PATCH] backend_x64: Accurately handle NaNs --- src/backend_x64/emit_x64.h | 1 + src/backend_x64/emit_x64_floating_point.cpp | 164 ++++++++++++++--- .../emit_x64_vector_floating_point.cpp | 170 +++++++++++++++++- src/common/fp_util.h | 76 ++++++++ 4 files changed, 383 insertions(+), 28 deletions(-) create mode 100644 src/common/fp_util.h diff --git a/src/backend_x64/emit_x64.h b/src/backend_x64/emit_x64.h index e19ec810..9494cc93 100644 --- a/src/backend_x64/emit_x64.h +++ b/src/backend_x64/emit_x64.h @@ -36,6 +36,7 @@ struct EmitContext { virtual bool FPSCR_RoundTowardsZero() const = 0; virtual bool FPSCR_FTZ() const = 0; virtual bool FPSCR_DN() const = 0; + virtual bool AccurateNaN() const { return true; } RegAlloc& reg_alloc; IR::Block& block; diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index bd3c489c..3fd4dae6 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -4,10 +4,12 @@ * General Public License version 2 or any later version. */ +#include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/fp_util.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" #include "frontend/ir/opcodes.h" @@ -95,33 +97,127 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64 code.L(end); } -static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) { - Xbyak::Label end; - - code.ucomiss(xmm_value, xmm_value); - code.jnp(end); - code.movaps(xmm_value, code.MConst(f32_nan)); - code.L(end); -} - -static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) { - Xbyak::Label end; - - code.ucomisd(xmm_value, xmm_value); - code.jnp(end); - code.movaps(xmm_value, code.MConst(f64_nan)); - code.L(end); -} - static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) { code.pxor(xmm_scratch, xmm_scratch); code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN) code.pand(xmm_value, xmm_scratch); } +static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) { + Xbyak::Label nan, end; + + code.ucomiss(a, b); + code.jp(nan, code.T_NEAR); + code.SwitchToFarCode(); + code.L(nan); + + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); + code.xor_(code.ABI_PARAM1.cvt32(), code.ABI_PARAM1.cvt32()); + code.xor_(code.ABI_PARAM2.cvt32(), code.ABI_PARAM2.cvt32()); + code.movd(code.ABI_PARAM1.cvt32(), a); + code.movd(code.ABI_PARAM2.cvt32(), b); + code.CallFunction(static_cast([](u32 a, u32 b) -> u32 { + return *Common::ProcessNaNs(a, b); + })); + code.movd(a, code.ABI_RETURN.cvt32()); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); + code.add(rsp, 8); + + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + return end; +} + +static void PostProcessNaNs32(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) { + code.movaps(tmp, result); + code.cmpunordps(tmp, tmp); + code.pslld(tmp, 31); + code.xorps(result, tmp); +} + +static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) { + Xbyak::Label end; + code.ucomiss(xmm_value, xmm_value); + code.jnp(end); + code.movaps(xmm_value, code.MConst(f32_nan)); + code.L(end); +} + +static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) { + Xbyak::Label nan, end; + + code.ucomisd(a, b); + code.jp(nan, code.T_NEAR); + code.SwitchToFarCode(); + code.L(nan); + + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); + code.movq(code.ABI_PARAM1, a); + code.movq(code.ABI_PARAM2, b); + code.CallFunction(static_cast([](u64 a, u64 b) -> u64 { + return *Common::ProcessNaNs(a, b); + })); + code.movq(a, code.ABI_RETURN); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx())); + code.add(rsp, 8); + + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + return end; +} + +static void PostProcessNaNs64(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) { + code.movaps(tmp, result); + code.cmpunordpd(tmp, tmp); + code.psllq(tmp, 63); + code.xorps(result, tmp); +} + +static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) { + Xbyak::Label end; + code.ucomisd(xmm_value, xmm_value); + code.jnp(end); + code.movaps(xmm_value, code.MConst(f64_nan)); + code.L(end); +} + +static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) { + Xbyak::Label nan, end; + + code.ucomiss(a, a); + code.jp(nan, code.T_NEAR); + code.SwitchToFarCode(); + code.L(nan); + + code.orps(a, code.MConst(0x00400000)); + + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + return end; +} + +static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) { + Xbyak::Label nan, end; + + code.ucomisd(a, a); + code.jp(nan, code.T_NEAR); + code.SwitchToFarCode(); + code.L(nan); + + code.orps(a, code.MConst(0x0008'0000'0000'0000)); + + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + return end; +} + static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Label end; + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); @@ -130,13 +226,19 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi DenormalsAreZero32(code, result, gpr_scratch); DenormalsAreZero32(code, operand, gpr_scratch); } + if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { + end = PreProcessNaNs32(code, result, operand); + } (code.*fn)(result, operand); if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); + } else if (ctx.AccurateNaN()) { + PostProcessNaNs32(code, result, operand); } + code.L(end); ctx.reg_alloc.DefineValue(inst, result); } @@ -144,6 +246,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Label end; + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); @@ -152,13 +256,19 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi DenormalsAreZero64(code, result, gpr_scratch); DenormalsAreZero64(code, operand, gpr_scratch); } + if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { + end = PreProcessNaNs64(code, result, operand); + } (code.*fn)(result, operand); if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); + } else if (ctx.AccurateNaN()) { + PostProcessNaNs64(code, result, operand); } + code.L(end); ctx.reg_alloc.DefineValue(inst, result); } @@ -166,20 +276,27 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Label end; + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32(); if (ctx.FPSCR_FTZ()) { DenormalsAreZero32(code, result, gpr_scratch); } - + if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { + end = ProcessNaN32(code, result); + } (code.*fn)(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero32(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN32(code, result); + } else if (ctx.AccurateNaN()) { + PostProcessNaNs32(code, result, ctx.reg_alloc.ScratchXmm()); } + code.L(end); ctx.reg_alloc.DefineValue(inst, result); } @@ -187,20 +304,27 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Label end; + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); if (ctx.FPSCR_FTZ()) { DenormalsAreZero64(code, result, gpr_scratch); } - + if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) { + end = ProcessNaN64(code, result); + } (code.*fn)(result, result); if (ctx.FPSCR_FTZ()) { FlushToZero64(code, result, gpr_scratch); } if (ctx.FPSCR_DN()) { DefaultNaN64(code, result); + } else if (ctx.AccurateNaN()) { + PostProcessNaNs64(code, result, ctx.reg_alloc.ScratchXmm()); } + code.L(end); ctx.reg_alloc.DefineValue(inst, result); } diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index d1b733f4..e740f677 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -4,8 +4,10 @@ * General Public License version 2 or any later version. */ +#include "backend_x64/abi.h" #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" +#include "common/fp_util.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" @@ -14,31 +16,183 @@ namespace Dynarmic::BackendX64 { using namespace Xbyak::util; template -static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { +static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { + if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + + (code.*fn)(xmm_a, xmm_b); + + if (ctx.FPSCR_DN()) { + Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.pcmpeqw(tmp, tmp); + code.movaps(nan_mask, xmm_a); + code.cmpordps(nan_mask, nan_mask); + code.andps(xmm_a, nan_mask); + code.xorps(nan_mask, tmp); + code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000)); + code.orps(xmm_a, nan_mask); + } + + ctx.reg_alloc.DefineValue(inst, xmm_a); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Label end, nan; + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); - (code.*fn)(xmm_a, xmm_b); + code.movaps(nan_mask, xmm_b); + code.movaps(result, xmm_a); + code.cmpunordps(nan_mask, xmm_a); + (code.*fn)(result, xmm_b); + code.cmpunordps(nan_mask, result); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.ptest(nan_mask, nan_mask); + } else { + Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32(); + code.movmskps(bitmask, nan_mask); + code.cmp(bitmask, 0); + } + code.jz(end); + code.jmp(nan, code.T_NEAR); + code.L(end); - ctx.reg_alloc.DefineValue(inst, xmm_a); + code.SwitchToFarCode(); + code.L(nan); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + const size_t stack_space = 3 * 16; + code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); + code.movaps(xword[code.ABI_PARAM1], result); + code.movaps(xword[code.ABI_PARAM2], xmm_a); + code.movaps(xword[code.ABI_PARAM3], xmm_b); + code.CallFunction(static_cast&, const std::array&, const std::array&)>( + [](std::array& result, const std::array& a, const std::array& b) { + for (size_t i = 0; i < 4; ++i) { + if (auto r = Common::ProcessNaNs(a[i], b[i])) { + result[i] = *r; + } else if (Common::IsNaN(result[i])) { + result[i] = 0x7fc00000; + } + } + } + )); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + + ctx.reg_alloc.DefineValue(inst, result); +} + +template +static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { + if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + + (code.*fn)(xmm_a, xmm_b); + + if (ctx.FPSCR_DN()) { + Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + code.pcmpeqw(tmp, tmp); + code.movaps(nan_mask, xmm_a); + code.cmpordpd(nan_mask, nan_mask); + code.andps(xmm_a, nan_mask); + code.xorps(nan_mask, tmp); + code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000)); + code.orps(xmm_a, nan_mask); + } + + ctx.reg_alloc.DefineValue(inst, xmm_a); + return; + } + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + Xbyak::Label end, nan; + Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); + Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + + code.movaps(nan_mask, xmm_b); + code.movaps(result, xmm_a); + code.cmpunordpd(nan_mask, xmm_a); + (code.*fn)(result, xmm_b); + code.cmpunordpd(nan_mask, result); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.ptest(nan_mask, nan_mask); + } else { + Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32(); + code.movmskps(bitmask, nan_mask); + code.cmp(bitmask, 0); + } + code.jz(end); + code.jmp(nan, code.T_NEAR); + code.L(end); + + code.SwitchToFarCode(); + code.L(nan); + code.sub(rsp, 8); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + const size_t stack_space = 3 * 16; + code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); + code.movaps(xword[code.ABI_PARAM1], result); + code.movaps(xword[code.ABI_PARAM2], xmm_a); + code.movaps(xword[code.ABI_PARAM3], xmm_b); + code.CallFunction(static_cast&, const std::array&, const std::array&)>( + [](std::array& result, const std::array& a, const std::array& b) { + for (size_t i = 0; i < 4; ++i) { + if (auto r = Common::ProcessNaNs(a[i], b[i])) { + result[i] = *r; + } else if (Common::IsNaN(result[i])) { + result[i] = 0x7ff8'0000'0000'0000; + } + } + } + )); + code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.add(rsp, stack_space + ABI_SHADOW_SPACE); + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx())); + code.add(rsp, 8); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + + ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addps); + EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps); } void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) { - EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addpd); + EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::addpd); } void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subps); + EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps); } void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) { - EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subpd); + EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::subpd); } } // namespace Dynarmic::BackendX64 diff --git a/src/common/fp_util.h b/src/common/fp_util.h new file mode 100644 index 00000000..9f65e5f9 --- /dev/null +++ b/src/common/fp_util.h @@ -0,0 +1,76 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include + +namespace Dynarmic { +namespace Common { + +/// Is 32-bit floating point value a QNaN? +constexpr bool IsQNaN(u32 value) { + return (value & 0x7fc00000) == 0x7fc00000; +} + +/// Is 32-bit floating point value a SNaN? +constexpr bool IsSNaN(u32 value) { + return (value & 0x7fc00000) == 0x7f800000 && (value & 0x007fffff) != 0; +} + +/// Is 32-bit floating point value a NaN? +constexpr bool IsNaN(u32 value) { + return IsQNaN(value) || IsSNaN(value); +} + +/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor. +/// If neither argument is a NaN, returns boost::none. +inline boost::optional ProcessNaNs(u32 a, u32 b) { + if (IsSNaN(a)) { + return a | 0x00400000; + } else if (IsSNaN(b)) { + return b | 0x00400000; + } else if (IsQNaN(a)) { + return a; + } else if (IsQNaN(b)) { + return b; + } + return boost::none; +} + +/// Is 64-bit floating point value a QNaN? +constexpr bool IsQNaN(u64 value) { + return (value & 0x7FF8'0000'0000'0000) == 0x7FF8'0000'0000'0000; +} + +/// Is 64-bit floating point value a SNaN? +constexpr bool IsSNaN(u64 value) { + return (value & 0x7FF8'0000'0000'0000) == 0x7FF0'0000'0000'0000 + && (value & 0x0007'FFFF'FFFF'FFFF) != 0; +} + +/// Is 64-bit floating point value a NaN? +constexpr bool IsNaN(u64 value) { + return IsQNaN(value) || IsSNaN(value); +} + +/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor. +/// If neither argument is a NaN, returns boost::none. +inline boost::optional ProcessNaNs(u64 a, u64 b) { + if (IsSNaN(a)) { + return a | 0x0008'0000'0000'0000; + } else if (IsSNaN(b)) { + return b | 0x0008'0000'0000'0000; + } else if (IsQNaN(a)) { + return a; + } else if (IsQNaN(b)) { + return b; + } + return boost::none; +} + +} // namespace Common +} // namespace Dynarmic