diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index deb5ab1f..0bf4f619 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1160,28 +1160,30 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const auto rounding = static_cast(inst->GetArg(1).GetU8()); const bool exact = inst->GetArg(2).GetU1(); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) { - const u8 round_imm = [&]() -> u8 { - switch (rounding) { - case FP::RoundingMode::ToNearest_TieEven: - return 0b00; - case FP::RoundingMode::TowardsPlusInfinity: - return 0b10; - case FP::RoundingMode::TowardsMinusInfinity: - return 0b01; - case FP::RoundingMode::TowardsZero: - return 0b11; - default: - UNREACHABLE(); - } - return 0; - }(); + if constexpr (fsize != 16) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero && !exact) { + const u8 round_imm = [&]() -> u8 { + switch (rounding) { + case FP::RoundingMode::ToNearest_TieEven: + return 0b00; + case FP::RoundingMode::TowardsPlusInfinity: + return 0b10; + case FP::RoundingMode::TowardsMinusInfinity: + return 0b01; + case FP::RoundingMode::TowardsZero: + return 0b11; + default: + UNREACHABLE(); + } + return 0; + }(); - EmitTwoOpVectorOperation(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){ - FCODE(roundp)(result, xmm_a, round_imm); - }); + EmitTwoOpVectorOperation(code, ctx, inst, [&](const Xbyak::Xmm& result, const Xbyak::Xmm& xmm_a){ + FCODE(roundp)(result, xmm_a, round_imm); + }); - return; + return; + } } using rounding_list = mp::list< @@ -1218,6 +1220,10 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { EmitTwoOpFallback(code, ctx, inst, lut.at(std::make_tuple(rounding, exact))); } +void EmitX64::EmitFPVectorRoundInt16(EmitContext& ctx, IR::Inst* inst) { + EmitFPVectorRoundInt<16>(code, ctx, inst); +} + void EmitX64::EmitFPVectorRoundInt32(EmitContext& ctx, IR::Inst* inst) { EmitFPVectorRoundInt<32>(code, ctx, inst); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 61c8bf1c..27527f6c 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2278,11 +2278,16 @@ U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128& } U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact) { + const IR::U8 rounding_imm = Imm8(static_cast(rounding)); + const IR::U1 exact_imm = Imm1(exact); + switch (esize) { + case 16: + return Inst(Opcode::FPVectorRoundInt16, operand, rounding_imm, exact_imm); case 32: - return Inst(Opcode::FPVectorRoundInt32, operand, Imm8(static_cast(rounding)), Imm1(exact)); + return Inst(Opcode::FPVectorRoundInt32, operand, rounding_imm, exact_imm); case 64: - return Inst(Opcode::FPVectorRoundInt64, operand, Imm8(static_cast(rounding)), Imm1(exact)); + return Inst(Opcode::FPVectorRoundInt64, operand, rounding_imm, exact_imm); } UNREACHABLE(); return {}; diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 527aef5b..c7a4c227 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -574,6 +574,7 @@ OPCODE(FPVectorRecipEstimate32, U128, U128 OPCODE(FPVectorRecipEstimate64, U128, U128 ) OPCODE(FPVectorRecipStepFused32, U128, U128, U128 ) OPCODE(FPVectorRecipStepFused64, U128, U128, U128 ) +OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) OPCODE(FPVectorRSqrtEstimate32, U128, U128 )