backend_x64: Accurately handle NaNs
This commit is contained in:
parent
e97581d063
commit
07520f32c3
4 changed files with 383 additions and 28 deletions
|
@ -36,6 +36,7 @@ struct EmitContext {
|
|||
virtual bool FPSCR_RoundTowardsZero() const = 0;
|
||||
virtual bool FPSCR_FTZ() const = 0;
|
||||
virtual bool FPSCR_DN() const = 0;
|
||||
virtual bool AccurateNaN() const { return true; }
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/emit_x64.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/fp_util.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
@ -95,33 +97,127 @@ static void FlushToZero64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Reg64
|
|||
code.L(end);
|
||||
}
|
||||
|
||||
static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||
Xbyak::Label end;
|
||||
|
||||
code.ucomiss(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f32_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||
Xbyak::Label end;
|
||||
|
||||
code.ucomisd(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f64_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
static void ZeroIfNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value, Xbyak::Xmm xmm_scratch) {
|
||||
code.pxor(xmm_scratch, xmm_scratch);
|
||||
code.cmpordsd(xmm_scratch, xmm_value); // true mask when ordered (i.e.: when not an NaN)
|
||||
code.pand(xmm_value, xmm_scratch);
|
||||
}
|
||||
|
||||
static Xbyak::Label PreProcessNaNs32(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
|
||||
Xbyak::Label nan, end;
|
||||
|
||||
code.ucomiss(a, b);
|
||||
code.jp(nan, code.T_NEAR);
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.sub(rsp, 8);
|
||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
|
||||
code.xor_(code.ABI_PARAM1.cvt32(), code.ABI_PARAM1.cvt32());
|
||||
code.xor_(code.ABI_PARAM2.cvt32(), code.ABI_PARAM2.cvt32());
|
||||
code.movd(code.ABI_PARAM1.cvt32(), a);
|
||||
code.movd(code.ABI_PARAM2.cvt32(), b);
|
||||
code.CallFunction(static_cast<u32(*)(u32, u32)>([](u32 a, u32 b) -> u32 {
|
||||
return *Common::ProcessNaNs(a, b);
|
||||
}));
|
||||
code.movd(a, code.ABI_RETURN.cvt32());
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
|
||||
code.add(rsp, 8);
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
return end;
|
||||
}
|
||||
|
||||
static void PostProcessNaNs32(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
|
||||
code.movaps(tmp, result);
|
||||
code.cmpunordps(tmp, tmp);
|
||||
code.pslld(tmp, 31);
|
||||
code.xorps(result, tmp);
|
||||
}
|
||||
|
||||
static void DefaultNaN32(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||
Xbyak::Label end;
|
||||
code.ucomiss(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f32_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
static Xbyak::Label PreProcessNaNs64(BlockOfCode& code, Xbyak::Xmm a, Xbyak::Xmm b) {
|
||||
Xbyak::Label nan, end;
|
||||
|
||||
code.ucomisd(a, b);
|
||||
code.jp(nan, code.T_NEAR);
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.sub(rsp, 8);
|
||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
|
||||
code.movq(code.ABI_PARAM1, a);
|
||||
code.movq(code.ABI_PARAM2, b);
|
||||
code.CallFunction(static_cast<u64(*)(u64, u64)>([](u64 a, u64 b) -> u64 {
|
||||
return *Common::ProcessNaNs(a, b);
|
||||
}));
|
||||
code.movq(a, code.ABI_RETURN);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(a.getIdx()));
|
||||
code.add(rsp, 8);
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
return end;
|
||||
}
|
||||
|
||||
static void PostProcessNaNs64(BlockOfCode& code, Xbyak::Xmm result, Xbyak::Xmm tmp) {
|
||||
code.movaps(tmp, result);
|
||||
code.cmpunordpd(tmp, tmp);
|
||||
code.psllq(tmp, 63);
|
||||
code.xorps(result, tmp);
|
||||
}
|
||||
|
||||
static void DefaultNaN64(BlockOfCode& code, Xbyak::Xmm xmm_value) {
|
||||
Xbyak::Label end;
|
||||
code.ucomisd(xmm_value, xmm_value);
|
||||
code.jnp(end);
|
||||
code.movaps(xmm_value, code.MConst(f64_nan));
|
||||
code.L(end);
|
||||
}
|
||||
|
||||
static Xbyak::Label ProcessNaN32(BlockOfCode& code, Xbyak::Xmm a) {
|
||||
Xbyak::Label nan, end;
|
||||
|
||||
code.ucomiss(a, a);
|
||||
code.jp(nan, code.T_NEAR);
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.orps(a, code.MConst(0x00400000));
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
return end;
|
||||
}
|
||||
|
||||
static Xbyak::Label ProcessNaN64(BlockOfCode& code, Xbyak::Xmm a) {
|
||||
Xbyak::Label nan, end;
|
||||
|
||||
code.ucomisd(a, a);
|
||||
code.jp(nan, code.T_NEAR);
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
|
||||
code.orps(a, code.MConst(0x0008'0000'0000'0000));
|
||||
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
return end;
|
||||
}
|
||||
|
||||
static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Label end;
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -130,13 +226,19 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
|||
DenormalsAreZero32(code, result, gpr_scratch);
|
||||
DenormalsAreZero32(code, operand, gpr_scratch);
|
||||
}
|
||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||
end = PreProcessNaNs32(code, result, operand);
|
||||
}
|
||||
(code.*fn)(result, operand);
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
FlushToZero32(code, result, gpr_scratch);
|
||||
}
|
||||
if (ctx.FPSCR_DN()) {
|
||||
DefaultNaN32(code, result);
|
||||
} else if (ctx.AccurateNaN()) {
|
||||
PostProcessNaNs32(code, result, operand);
|
||||
}
|
||||
code.L(end);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -144,6 +246,8 @@ static void FPThreeOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
|||
static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Label end;
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
||||
|
@ -152,13 +256,19 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
|||
DenormalsAreZero64(code, result, gpr_scratch);
|
||||
DenormalsAreZero64(code, operand, gpr_scratch);
|
||||
}
|
||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||
end = PreProcessNaNs64(code, result, operand);
|
||||
}
|
||||
(code.*fn)(result, operand);
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
FlushToZero64(code, result, gpr_scratch);
|
||||
}
|
||||
if (ctx.FPSCR_DN()) {
|
||||
DefaultNaN64(code, result);
|
||||
} else if (ctx.AccurateNaN()) {
|
||||
PostProcessNaNs64(code, result, operand);
|
||||
}
|
||||
code.L(end);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -166,20 +276,27 @@ static void FPThreeOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, voi
|
|||
static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Label end;
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Reg32 gpr_scratch = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
DenormalsAreZero32(code, result, gpr_scratch);
|
||||
}
|
||||
|
||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||
end = ProcessNaN32(code, result);
|
||||
}
|
||||
(code.*fn)(result, result);
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
FlushToZero32(code, result, gpr_scratch);
|
||||
}
|
||||
if (ctx.FPSCR_DN()) {
|
||||
DefaultNaN32(code, result);
|
||||
} else if (ctx.AccurateNaN()) {
|
||||
PostProcessNaNs32(code, result, ctx.reg_alloc.ScratchXmm());
|
||||
}
|
||||
code.L(end);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -187,20 +304,27 @@ static void FPTwoOp32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void
|
|||
static void FPTwoOp64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Label end;
|
||||
|
||||
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
DenormalsAreZero64(code, result, gpr_scratch);
|
||||
}
|
||||
|
||||
if (ctx.AccurateNaN() && !ctx.FPSCR_DN()) {
|
||||
end = ProcessNaN64(code, result);
|
||||
}
|
||||
(code.*fn)(result, result);
|
||||
if (ctx.FPSCR_FTZ()) {
|
||||
FlushToZero64(code, result, gpr_scratch);
|
||||
}
|
||||
if (ctx.FPSCR_DN()) {
|
||||
DefaultNaN64(code, result);
|
||||
} else if (ctx.AccurateNaN()) {
|
||||
PostProcessNaNs64(code, result, ctx.reg_alloc.ScratchXmm());
|
||||
}
|
||||
code.L(end);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/emit_x64.h"
|
||||
#include "common/fp_util.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
|
||||
|
@ -14,31 +16,183 @@ namespace Dynarmic::BackendX64 {
|
|||
using namespace Xbyak::util;
|
||||
|
||||
template <typename Function>
|
||||
static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||
static void EmitVectorOperation32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||
if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
(code.*fn)(xmm_a, xmm_b);
|
||||
|
||||
if (ctx.FPSCR_DN()) {
|
||||
Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
code.pcmpeqw(tmp, tmp);
|
||||
code.movaps(nan_mask, xmm_a);
|
||||
code.cmpordps(nan_mask, nan_mask);
|
||||
code.andps(xmm_a, nan_mask);
|
||||
code.xorps(nan_mask, tmp);
|
||||
code.andps(nan_mask, code.MConst(0x7fc0'0000'7fc0'0000, 0x7fc0'0000'7fc0'0000));
|
||||
code.orps(xmm_a, nan_mask);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Label end, nan;
|
||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
(code.*fn)(xmm_a, xmm_b);
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
code.movaps(result, xmm_a);
|
||||
code.cmpunordps(nan_mask, xmm_a);
|
||||
(code.*fn)(result, xmm_b);
|
||||
code.cmpunordps(nan_mask, result);
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
code.ptest(nan_mask, nan_mask);
|
||||
} else {
|
||||
Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.movmskps(bitmask, nan_mask);
|
||||
code.cmp(bitmask, 0);
|
||||
}
|
||||
code.jz(end);
|
||||
code.jmp(nan, code.T_NEAR);
|
||||
code.L(end);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
code.sub(rsp, 8);
|
||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
const size_t stack_space = 3 * 16;
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.movaps(xword[code.ABI_PARAM1], result);
|
||||
code.movaps(xword[code.ABI_PARAM2], xmm_a);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm_b);
|
||||
code.CallFunction(static_cast<void(*)(std::array<u32, 4>&, const std::array<u32, 4>&, const std::array<u32, 4>&)>(
|
||||
[](std::array<u32, 4>& result, const std::array<u32, 4>& a, const std::array<u32, 4>& b) {
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
if (auto r = Common::ProcessNaNs(a[i], b[i])) {
|
||||
result[i] = *r;
|
||||
} else if (Common::IsNaN(result[i])) {
|
||||
result[i] = 0x7fc00000;
|
||||
}
|
||||
}
|
||||
}
|
||||
));
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
code.add(rsp, 8);
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
template <typename Function>
|
||||
static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) {
|
||||
if (!ctx.AccurateNaN() || ctx.FPSCR_DN()) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
(code.*fn)(xmm_a, xmm_b);
|
||||
|
||||
if (ctx.FPSCR_DN()) {
|
||||
Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
code.pcmpeqw(tmp, tmp);
|
||||
code.movaps(nan_mask, xmm_a);
|
||||
code.cmpordpd(nan_mask, nan_mask);
|
||||
code.andps(xmm_a, nan_mask);
|
||||
code.xorps(nan_mask, tmp);
|
||||
code.andps(nan_mask, code.MConst(0x7ff8'0000'0000'0000, 0x7ff8'0000'0000'0000));
|
||||
code.orps(xmm_a, nan_mask);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Label end, nan;
|
||||
Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movaps(nan_mask, xmm_b);
|
||||
code.movaps(result, xmm_a);
|
||||
code.cmpunordpd(nan_mask, xmm_a);
|
||||
(code.*fn)(result, xmm_b);
|
||||
code.cmpunordpd(nan_mask, result);
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
code.ptest(nan_mask, nan_mask);
|
||||
} else {
|
||||
Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.movmskps(bitmask, nan_mask);
|
||||
code.cmp(bitmask, 0);
|
||||
}
|
||||
code.jz(end);
|
||||
code.jmp(nan, code.T_NEAR);
|
||||
code.L(end);
|
||||
|
||||
code.SwitchToFarCode();
|
||||
code.L(nan);
|
||||
code.sub(rsp, 8);
|
||||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
const size_t stack_space = 3 * 16;
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.movaps(xword[code.ABI_PARAM1], result);
|
||||
code.movaps(xword[code.ABI_PARAM2], xmm_a);
|
||||
code.movaps(xword[code.ABI_PARAM3], xmm_b);
|
||||
code.CallFunction(static_cast<void(*)(std::array<u64, 2>&, const std::array<u64, 2>&, const std::array<u64, 2>&)>(
|
||||
[](std::array<u64, 2>& result, const std::array<u64, 2>& a, const std::array<u64, 2>& b) {
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
if (auto r = Common::ProcessNaNs(a[i], b[i])) {
|
||||
result[i] = *r;
|
||||
} else if (Common::IsNaN(result[i])) {
|
||||
result[i] = 0x7ff8'0000'0000'0000;
|
||||
}
|
||||
}
|
||||
}
|
||||
));
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
code.add(rsp, 8);
|
||||
code.jmp(end, code.T_NEAR);
|
||||
code.SwitchToNearCode();
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addps);
|
||||
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::addps);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
|
||||
EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::addpd);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||
EmitVectorOperation32(code, ctx, inst, &Xbyak::CodeGenerator::subps);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
|
||||
EmitVectorOperation64(code, ctx, inst, &Xbyak::CodeGenerator::subpd);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
|
|
76
src/common/fp_util.h
Normal file
76
src/common/fp_util.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace Common {
|
||||
|
||||
/// Is 32-bit floating point value a QNaN?
|
||||
constexpr bool IsQNaN(u32 value) {
|
||||
return (value & 0x7fc00000) == 0x7fc00000;
|
||||
}
|
||||
|
||||
/// Is 32-bit floating point value a SNaN?
|
||||
constexpr bool IsSNaN(u32 value) {
|
||||
return (value & 0x7fc00000) == 0x7f800000 && (value & 0x007fffff) != 0;
|
||||
}
|
||||
|
||||
/// Is 32-bit floating point value a NaN?
|
||||
constexpr bool IsNaN(u32 value) {
|
||||
return IsQNaN(value) || IsSNaN(value);
|
||||
}
|
||||
|
||||
/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
|
||||
/// If neither argument is a NaN, returns boost::none.
|
||||
inline boost::optional<u32> ProcessNaNs(u32 a, u32 b) {
|
||||
if (IsSNaN(a)) {
|
||||
return a | 0x00400000;
|
||||
} else if (IsSNaN(b)) {
|
||||
return b | 0x00400000;
|
||||
} else if (IsQNaN(a)) {
|
||||
return a;
|
||||
} else if (IsQNaN(b)) {
|
||||
return b;
|
||||
}
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
/// Is 64-bit floating point value a QNaN?
|
||||
constexpr bool IsQNaN(u64 value) {
|
||||
return (value & 0x7FF8'0000'0000'0000) == 0x7FF8'0000'0000'0000;
|
||||
}
|
||||
|
||||
/// Is 64-bit floating point value a SNaN?
|
||||
constexpr bool IsSNaN(u64 value) {
|
||||
return (value & 0x7FF8'0000'0000'0000) == 0x7FF0'0000'0000'0000
|
||||
&& (value & 0x0007'FFFF'FFFF'FFFF) != 0;
|
||||
}
|
||||
|
||||
/// Is 64-bit floating point value a NaN?
|
||||
constexpr bool IsNaN(u64 value) {
|
||||
return IsQNaN(value) || IsSNaN(value);
|
||||
}
|
||||
|
||||
/// Given a pair of arguments, return the NaN value which would be returned by an ARM processor.
|
||||
/// If neither argument is a NaN, returns boost::none.
|
||||
inline boost::optional<u64> ProcessNaNs(u64 a, u64 b) {
|
||||
if (IsSNaN(a)) {
|
||||
return a | 0x0008'0000'0000'0000;
|
||||
} else if (IsSNaN(b)) {
|
||||
return b | 0x0008'0000'0000'0000;
|
||||
} else if (IsQNaN(a)) {
|
||||
return a;
|
||||
} else if (IsQNaN(b)) {
|
||||
return b;
|
||||
}
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Dynarmic
|
Loading…
Reference in a new issue