diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 68ebfcd4..90e3ef37 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -793,6 +793,59 @@ void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) { }); } +static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const auto rounding = static_cast(args[1].GetImmediateU8()); + const bool exact = args[2].GetImmediateU1(); + + using fsize_list = mp::list, mp::vlift>; + using rounding_list = mp::list< + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant, + std::integral_constant + >; + using exact_list = mp::list, mp::vlift>; + + using key_type = std::tuple; + using value_type = u64(*)(u64, FP::FPSR&, A64::FPCR); + + static const auto lut = mp::GenerateLookupTableFromList( + [](auto args) { + return std::pair{ + mp::to_tuple, + static_cast( + [](u64 input, FP::FPSR& fpsr, A64::FPCR fpcr) { + constexpr auto t = mp::to_tuple; + constexpr size_t fsize = std::get<0>(t); + constexpr FP::RoundingMode rounding_mode = std::get<1>(t); + constexpr bool exact = std::get<2>(t); + using InputSize = mp::unsigned_integer_of_size; + + return FP::FPRoundInt(static_cast(input), fpcr, rounding_mode, exact, fpsr); + } + ) + }; + }, + mp::cartesian_product{} + ); + + ctx.reg_alloc.HostCall(inst, args[0]); + code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.CallFunction(lut.at(std::make_tuple(fsize, rounding, exact))); +} + +void EmitX64::EmitFPRoundInt32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRound(code, ctx, inst, 32); +} + +void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRound(code, ctx, inst, 64); +} + void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); } diff --git a/src/common/fp/op/FPRoundInt.cpp b/src/common/fp/op/FPRoundInt.cpp index 2bc0e1be..c0a17f1f 100644 --- a/src/common/fp/op/FPRoundInt.cpp +++ b/src/common/fp/op/FPRoundInt.cpp @@ -43,7 +43,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) return op; } - u64 int_result = value.mantissa; + u64 int_result = sign ? Safe::Negate(value.mantissa) : static_cast(value.mantissa); const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); @@ -72,9 +72,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) int_result++; } + const u64 abs_int_result = Common::MostSignificantBit(int_result) ? Safe::Negate(int_result) : static_cast(int_result); + const FPT result = int_result == 0 ? FPInfo::Zero(sign) - : FPRound(FPUnpacked{sign, 0, int_result}, fpcr, RoundingMode::TowardsZero, fpsr); + : FPRound(FPUnpacked{sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); if (error != ResidualError::Zero && exact) { FPProcessException(FPExc::Inexact, fpcr, fpsr); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 6d4dc842..860b9f93 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1449,6 +1449,13 @@ U32U64 IREmitter::FPNeg(const U32U64& a) { } } +U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRoundInt32, a, static_cast(rounding), Imm1(exact)); + } + return Inst(Opcode::FPRoundInt64, a, static_cast(rounding), Imm1(exact)); +} + U32U64 IREmitter::FPSqrt(const U32U64& a) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPSqrt32, a); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 36fd0e68..91d50976 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -266,6 +266,7 @@ public: U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32U64 FPMulAdd(const U32U64& a, const U32U64& b, const U32U64& c, bool fpscr_controlled); U32U64 FPNeg(const U32U64& a); + U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPSqrt(const U32U64& a); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 5935759c..ede93ec8 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -384,6 +384,8 @@ OPCODE(FPMulAdd32, T::U32, T::U32, T::U OPCODE(FPMulAdd64, T::U64, T::U64, T::U64, T::U64 ) OPCODE(FPNeg32, T::U32, T::U32 ) OPCODE(FPNeg64, T::U64, T::U64 ) +OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 ) +OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 ) OPCODE(FPSqrt32, T::U32, T::U32 ) OPCODE(FPSqrt64, T::U64, T::U64 ) OPCODE(FPSub32, T::U32, T::U32, T::U32 )