From fa8925c4df95663f9c347616f26e41bdd2325386 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 17 Nov 2018 21:31:22 +0000 Subject: [PATCH] IR: Implement FPVectorMulX --- .../x64/emit_x64_vector_floating_point.cpp | 44 +++++++++++++++++++ src/frontend/ir/ir_emitter.cpp | 11 +++++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 2 + 4 files changed, 58 insertions(+) diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index b4c8b07b..12b92c4c 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -954,6 +954,50 @@ void EmitX64::EmitFPVectorMulAdd64(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorMulAdd<64>(code, ctx, inst); } +template +static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + using FPT = mp::unsigned_integer_of_size; + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + + code.movaps(nan_mask, xmm_b); + code.movaps(result, xmm_a); + FCODE(cmpunordp)(nan_mask, xmm_a); + FCODE(mulp)(result, xmm_b); + FCODE(cmpunordp)(nan_mask, result); + + const auto nan_handler = static_cast, 3>&, FP::FPCR)>( + [](std::array, 3>& values, FP::FPCR fpcr) { + VectorArray& result = values[0]; + for (size_t elementi = 0; elementi < result.size(); ++elementi) { + if (auto r = FP::ProcessNaNs(values[1][elementi], values[2][elementi])) { + result[elementi] = fpcr.DN() ? FP::FPInfo::DefaultNaN() : *r; + } else if (FP::IsNaN(result[elementi])) { + const FPT sign = (values[1][elementi] ^ values[2][elementi]) & FP::FPInfo::sign_mask; + result[elementi] = sign | FP::FPValue(); + } + } + } + ); + + HandleNaNs(code, ctx, {result, xmm_a, xmm_b}, nan_mask, nan_handler); + + ctx.reg_alloc.DefineValue(inst, result); +} + +void EmitX64::EmitFPVectorMulX32(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorMulX<32>(code, ctx, inst); +} + +void EmitX64::EmitFPVectorMulX64(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorMulX<64>(code, ctx, inst); +} + void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index ab708013..46b27c0c 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2135,6 +2135,17 @@ U128 IREmitter::FPVectorMulAdd(size_t esize, const U128& a, const U128& b, const return {}; } +U128 IREmitter::FPVectorMulX(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorMulX32, a, b); + case 64: + return Inst(Opcode::FPVectorMulX64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorNeg(size_t esize, const U128& a) { switch (esize) { case 16: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 0f49600f..5bfb7484 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -334,6 +334,7 @@ public: U128 FPVectorMin(size_t esize, const U128& a, const U128& b); U128 FPVectorMul(size_t esize, const U128& a, const U128& b); U128 FPVectorMulAdd(size_t esize, const U128& addend, const U128& op1, const U128& op2); + U128 FPVectorMulX(size_t esize, const U128& a, const U128& b); U128 FPVectorNeg(size_t esize, const U128& a); U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 4bcf3930..54ea8504 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -543,6 +543,8 @@ OPCODE(FPVectorMul32, U128, U128 OPCODE(FPVectorMul64, U128, U128, U128 ) OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 ) OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 ) +OPCODE(FPVectorMulX32, U128, U128, U128 ) +OPCODE(FPVectorMulX64, U128, U128, U128 ) OPCODE(FPVectorNeg16, U128, U128 ) OPCODE(FPVectorNeg32, U128, U128 ) OPCODE(FPVectorNeg64, U128, U128 )