diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 3fd10ced..2321b77b 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -843,7 +843,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz const bool exact = inst->GetArg(2).GetU1(); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); - if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm && !exact) { + if (fsize != 16 && code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41) && round_imm && !exact) { if (fsize == 64) { FPTwoOp<64>(code, ctx, inst, [&](Xbyak::Xmm result) { code.roundsd(result, result, *round_imm); @@ -857,7 +857,9 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz return; } - using fsize_list = mp::list, mp::vlift>; + using fsize_list = mp::list, + mp::vlift, + mp::vlift>; using rounding_list = mp::list< std::integral_constant, std::integral_constant, @@ -897,6 +899,10 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact))); } +void EmitX64::EmitFPRoundInt16(EmitContext& ctx, IR::Inst* inst) { + EmitFPRound(code, ctx, inst, 16); +} + void EmitX64::EmitFPRoundInt32(EmitContext& ctx, IR::Inst* inst) { EmitFPRound(code, ctx, inst, 32); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 53df95b0..61c8bf1c 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1950,11 +1950,21 @@ U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { return Inst(Opcode::FPRecipStepFused64, a, b); } -U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) { - if (a.GetType() == Type::U32) { - return Inst(Opcode::FPRoundInt32, a, static_cast(rounding), Imm1(exact)); +U16U32U64 IREmitter::FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact) { + const u8 rounding_value = static_cast(rounding); + const IR::U1 exact_imm = Imm1(exact); + + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPRoundInt16, a, rounding_value, exact_imm); + case Type::U32: + return Inst(Opcode::FPRoundInt32, a, rounding_value, exact_imm); + case Type::U64: + return Inst(Opcode::FPRoundInt64, a, rounding_value, exact_imm); + default: + UNREACHABLE(); + return U16U32U64{}; } - return Inst(Opcode::FPRoundInt64, a, static_cast(rounding), Imm1(exact)); } U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 32fad5b7..09935cf6 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -308,7 +308,7 @@ public: U32U64 FPRecipEstimate(const U32U64& a); U16U32U64 FPRecipExponent(const U16U32U64& a); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); - U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); + U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); U32U64 FPSqrt(const U32U64& a); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index c087b514..958fd471 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -279,6 +279,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPRecipExponent64: case Opcode::FPRecipStepFused32: case Opcode::FPRecipStepFused64: + case Opcode::FPRoundInt16: case Opcode::FPRoundInt32: case Opcode::FPRoundInt64: case Opcode::FPRSqrtEstimate32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index db3128e8..527aef5b 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -498,6 +498,7 @@ OPCODE(FPRecipExponent32, U32, U32 OPCODE(FPRecipExponent64, U64, U64 ) OPCODE(FPRecipStepFused32, U32, U32, U32 ) OPCODE(FPRecipStepFused64, U64, U64, U64 ) +OPCODE(FPRoundInt16, U16, U16, U8, U1 ) OPCODE(FPRoundInt32, U32, U32, U8, U1 ) OPCODE(FPRoundInt64, U64, U64, U8, U1 ) OPCODE(FPRSqrtEstimate32, U32, U32 )