From 45dc5f74f311e6cfc7d29e304f7b375a695c9d34 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 23 Jul 2018 22:46:12 +0100 Subject: [PATCH] A64: Implement FRSQRTE (vector), single/double variant --- .../emit_x64_vector_floating_point.cpp | 45 +++++++++++++++++++ src/frontend/A64/decoder/a64.inc | 2 +- .../translate/impl/simd_two_register_misc.cpp | 15 +++++++ src/frontend/ir/ir_emitter.cpp | 11 +++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 2 + 6 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 16c71623..20e516ae 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -10,7 +10,10 @@ #include "backend_x64/block_of_code.h" #include "backend_x64/emit_x64.h" #include "common/bit_util.h" +#include "common/fp/fpcr.h" +#include "common/fp/op.h" #include "common/fp/util.h" +#include "common/mp.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" @@ -222,6 +225,31 @@ static void EmitVectorOperation64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ctx.reg_alloc.DefineValue(inst, result); } +template +inline void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { + const auto fn = static_cast*>(lambda); + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + + constexpr u32 stack_space = 2 * 16; + code.sub(rsp, stack_space + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); + code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + + code.movaps(xword[code.ABI_PARAM2], arg1); + code.CallFunction(fn); + code.movaps(xmm0, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); + + code.add(rsp, stack_space + ABI_SHADOW_SPACE); + + ctx.reg_alloc.DefineValue(inst, xmm0); +} + void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -365,6 +393,23 @@ void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { }); } +template +static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + EmitOneArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < result.size(); i++) { + result[i] = FP::FPRSqrtEstimate(operand[i], fpcr, fpsr); + } + }); +} + +void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { + EmitRSqrtEstimate(code, ctx, inst); +} + +void EmitX64::EmitFPVectorRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) { + EmitRSqrtEstimate(code, ctx, inst); +} + void EmitX64::EmitFPVectorS32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index aeb92129..ab15234e 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -654,7 +654,7 @@ INST(FCMLE_4, "FCMLE (zero)", "0Q101 //INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") //INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") //INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") -//INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") +INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") //INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") //INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index fb825132..f93fd8e1 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -217,6 +217,21 @@ bool TranslatorVisitor::FCMLT_4(bool Q, bool sz, Vec Vn, Vec Vd) { return FPCompareAgainstZero(*this, Q, sz, Vn, Vd, ComparisonType::LT); } +bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) { + if (sz && !Q) { + return ReservedValue(); + } + + const size_t datasize = Q ? 128 : 64; + const size_t esize = sz ? 64 : 32; + + const IR::U128 operand = V(datasize, Vn); + const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FNEG_1(bool Q, Vec Vn, Vec Vd) { const size_t datasize = Q ? 128 : 64; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index da07d05d..863aaecf 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1679,6 +1679,17 @@ U128 IREmitter::FPVectorPairedAddLower(size_t esize, const U128& a, const U128& return {}; } +U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorRSqrtEstimate32, a); + case 64: + return Inst(Opcode::FPVectorRSqrtEstimate64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index dde77933..18724816 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -299,6 +299,7 @@ public: U128 FPVectorMul(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); + U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorS32ToSingle(const U128& a); U128 FPVectorS64ToDouble(const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 63756145..e294dc86 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -435,6 +435,8 @@ OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 ) +OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 ) OPCODE(FPVectorS32ToSingle, T::U128, T::U128 ) OPCODE(FPVectorS64ToDouble, T::U128, T::U128 ) OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 )