From 507bcd8b8ba363bc415a92d2b4b5ef412a51ad0e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 26 Jul 2018 12:08:56 +0100 Subject: [PATCH] IR: Implement FPVectorTo{Signed,Unsigned}Fixed --- .../emit_x64_vector_floating_point.cpp | 62 +++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 32 ++++++++-- src/frontend/ir/ir_emitter.h | 4 +- src/frontend/ir/opcodes.inc | 8 ++- 4 files changed, 99 insertions(+), 7 deletions(-) diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index 48c735b1..62629557 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -18,8 +18,13 @@ #include "common/fp/info.h" #include "common/fp/op.h" #include "common/fp/util.h" +#include "common/mp/cartesian_product.h" #include "common/mp/function_info.h" #include "common/mp/integer.h" +#include "common/mp/list.h" +#include "common/mp/lut.h" +#include "common/mp/to_tuple.h" +#include "common/mp/vllift.h" #include "frontend/ir/basic_block.h" #include "frontend/ir/microinstruction.h" @@ -815,6 +820,63 @@ void EmitX64::EmitFPVectorSub64(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subpd); } +template +void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + + const size_t fbits = inst->GetArg(1).GetU8(); + const auto rounding = static_cast(inst->GetArg(2).GetU8()); + + using fbits_list = mp::vllift>; + using rounding_list = mp::list< + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant + >; + + using key_type = std::tuple; + using value_type = void(*)(VectorArray&, const VectorArray&, FP::FPCR, FP::FPSR&); + + static const auto lut = mp::GenerateLookupTableFromList( + [](auto arg) { + return std::pair{ + mp::to_tuple, + static_cast( + [](VectorArray& output, const VectorArray& input, FP::FPCR fpcr, FP::FPSR& fpsr) { + constexpr size_t fbits = std::get<0>(mp::to_tuple); + constexpr FP::RoundingMode rounding_mode = std::get<1>(mp::to_tuple); + + for (size_t i = 0; i < output.size(); ++i) { + output[i] = static_cast(FP::FPToFixed(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr)); + } + } + ) + }; + }, + mp::cartesian_product{} + ); + + EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(fbits, rounding))); +} + +void EmitX64::EmitFPVectorToSignedFixed32(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<32, false>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorToSignedFixed64(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<64, false>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorToUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<32, true>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorToUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorToFixed<64, true>(code, ctx, inst); +} + void EmitX64::EmitFPVectorU32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index e5cd6a72..b3001b5f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1822,6 +1822,14 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& return {}; } +U128 IREmitter::FPVectorS32ToSingle(const U128& a) { + return Inst(Opcode::FPVectorS32ToSingle, a); +} + +U128 IREmitter::FPVectorS64ToDouble(const U128& a) { + return Inst(Opcode::FPVectorS64ToDouble, a); +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: @@ -1833,12 +1841,28 @@ U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { return {}; } -U128 IREmitter::FPVectorS32ToSingle(const U128& a) { - return Inst(Opcode::FPVectorS32ToSingle, a); +U128 IREmitter::FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= esize); + switch (esize) { + case 32: + return Inst(Opcode::FPVectorToSignedFixed32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + case 64: + return Inst(Opcode::FPVectorToSignedFixed64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + } + UNREACHABLE(); + return {}; } -U128 IREmitter::FPVectorS64ToDouble(const U128& a) { - return Inst(Opcode::FPVectorS64ToDouble, a); +U128 IREmitter::FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding) { + ASSERT(fbits <= esize); + switch (esize) { + case 32: + return Inst(Opcode::FPVectorToUnsignedFixed32, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + case 64: + return Inst(Opcode::FPVectorToUnsignedFixed64, a, Imm8(static_cast(fbits)), Imm8(static_cast(rounding))); + } + UNREACHABLE(); + return {}; } U128 IREmitter::FPVectorU32ToSingle(const U128& a) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 9eb575ce..25bdaff1 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -312,9 +312,11 @@ public: U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); - U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorS32ToSingle(const U128& a); U128 FPVectorS64ToDouble(const U128& a); + U128 FPVectorSub(size_t esize, const U128& a, const U128& b); + U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); + U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorU32ToSingle(const U128& a); U128 FPVectorU64ToDouble(const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index eccd21d3..9ab17dcd 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -454,10 +454,10 @@ OPCODE(FPVectorMulAdd64, T::U128, T::U128, OPCODE(FPVectorNeg16, T::U128, T::U128 ) OPCODE(FPVectorNeg32, T::U128, T::U128 ) OPCODE(FPVectorNeg64, T::U128, T::U128 ) -OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U128 ) -OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorPairedAddLower32, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorPairedAddLower64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 ) OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 ) OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 ) @@ -470,6 +470,10 @@ OPCODE(FPVectorS32ToSingle, T::U128, T::U128 OPCODE(FPVectorS64ToDouble, T::U128, T::U128 ) OPCODE(FPVectorSub32, T::U128, T::U128, T::U128 ) OPCODE(FPVectorSub64, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorToSignedFixed32, T::U128, T::U128, T::U8, T::U8 ) +OPCODE(FPVectorToSignedFixed64, T::U128, T::U128, T::U8, T::U8 ) +OPCODE(FPVectorToUnsignedFixed32, T::U128, T::U128, T::U8, T::U8 ) +OPCODE(FPVectorToUnsignedFixed64, T::U128, T::U128, T::U8, T::U8 ) OPCODE(FPVectorU32ToSingle, T::U128, T::U128 ) OPCODE(FPVectorU64ToDouble, T::U128, T::U128 )