From e18fca17dc6a0e7e50ea7b0a932c720d84c1e898 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 16 Jul 2018 16:51:16 +0100 Subject: [PATCH] A64: Implement FABD in terms of existing IR instructions Fixes NaN issue. Closes #306. --- .../emit_x64_vector_floating_point.cpp | 22 ------------------- .../translate/impl/simd_scalar_three_same.cpp | 9 ++++---- .../A64/translate/impl/simd_three_same.cpp | 2 +- src/frontend/ir/ir_emitter.cpp | 11 ---------- src/frontend/ir/ir_emitter.h | 1 - src/frontend/ir/opcodes.inc | 2 -- 6 files changed, 5 insertions(+), 42 deletions(-) diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 348acfbb..261bd6c8 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -224,28 +224,6 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.DefineValue(inst, result); } -void EmitX64::EmitFPVectorAbsoluteDifference32(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - code.subps(a, b); - code.andps(a, code.MConst(xword, 0x7FFFFFFF7FFFFFFF, 0x7FFFFFFF7FFFFFFF)); - - ctx.reg_alloc.DefineValue(inst, a); -} - -void EmitX64::EmitFPVectorAbsoluteDifference64(EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - code.subpd(a, b); - code.andpd(a, code.MConst(xword, 0x7FFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFFF)); - - ctx.reg_alloc.DefineValue(inst, a); -} - void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp index c3aba19c..3a3e94a2 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -173,12 +173,11 @@ bool TranslatorVisitor::CMTST_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::FABD_2(bool sz, Vec Vm, Vec Vn, Vec Vd) { const size_t esize = sz ? 64 : 32; - const IR::U128 operand1 = V(esize, Vn); - const IR::U128 operand2 = V(esize, Vm); - const IR::U128 difference = ir.FPVectorAbsoluteDifference(esize, operand1, operand2); - const IR::U128 result = ir.VectorZeroUpper(difference); + const IR::U32U64 operand1 = V_scalar(esize, Vn); + const IR::U32U64 operand2 = V_scalar(esize, Vm); + const IR::U32U64 result = ir.FPAbs(ir.FPSub(operand1, operand2, true)); - V(128, Vd, result); + V_scalar(esize, Vd, result); return true; } diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 36dfbca8..0e14e3d7 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -384,7 +384,7 @@ bool TranslatorVisitor::FABD_4(bool Q, bool sz, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::U128 result = ir.FPVectorAbsoluteDifference(esize, operand1, operand2); + const IR::U128 result = ir.FPVectorAbs(esize, ir.FPVectorSub(esize, operand1, operand2)); V(datasize, Vd, result); return true; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 860b9f93..cc46c63b 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1577,17 +1577,6 @@ U128 IREmitter::FPVectorAbs(size_t esize, const U128& a) { return {}; } -U128 IREmitter::FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b) { - switch (esize) { - case 32: - return Inst(Opcode::FPVectorAbsoluteDifference32, a, b); - case 64: - return Inst(Opcode::FPVectorAbsoluteDifference64, a, b); - } - UNREACHABLE(); - return {}; -} - U128 IREmitter::FPVectorAdd(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 91d50976..6986af67 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -289,7 +289,6 @@ public: U32 FPU64ToSingle(const U64& a, bool round_to_nearest, bool fpscr_controlled); U128 FPVectorAbs(size_t esize, const U128& a); - U128 FPVectorAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 FPVectorAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorDiv(size_t esize, const U128& a, const U128& b); U128 FPVectorEqual(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index ede93ec8..7a3ca289 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -415,8 +415,6 @@ OPCODE(FPS64ToSingle, T::U32, T::U64, T::U OPCODE(FPVectorAbs16, T::U128, T::U128 ) OPCODE(FPVectorAbs32, T::U128, T::U128 ) OPCODE(FPVectorAbs64, T::U128, T::U128 ) -OPCODE(FPVectorAbsoluteDifference32, T::U128, T::U128, T::U128 ) -OPCODE(FPVectorAbsoluteDifference64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorAdd64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorDiv32, T::U128, T::U128, T::U128 )