IR: Implement FPRoundInt
This commit is contained in:
parent
e24054f4d7
commit
b228694012
5 changed files with 67 additions and 2 deletions
|
@ -793,6 +793,59 @@ void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) {
|
|||
});
|
||||
}
|
||||
|
||||
static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const auto rounding = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
||||
const bool exact = args[2].GetImmediateU1();
|
||||
|
||||
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
||||
using rounding_list = mp::list<
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||
>;
|
||||
using exact_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
|
||||
|
||||
using key_type = std::tuple<size_t, FP::RoundingMode, bool>;
|
||||
using value_type = u64(*)(u64, FP::FPSR&, A64::FPCR);
|
||||
|
||||
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||
[](auto args) {
|
||||
return std::pair<key_type, value_type>{
|
||||
mp::to_tuple<decltype(args)>,
|
||||
static_cast<value_type>(
|
||||
[](u64 input, FP::FPSR& fpsr, A64::FPCR fpcr) {
|
||||
constexpr auto t = mp::to_tuple<decltype(args)>;
|
||||
constexpr size_t fsize = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<1>(t);
|
||||
constexpr bool exact = std::get<2>(t);
|
||||
using InputSize = mp::unsigned_integer_of_size<fsize>;
|
||||
|
||||
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
|
||||
}
|
||||
)
|
||||
};
|
||||
},
|
||||
mp::cartesian_product<fsize_list, rounding_list, exact_list>{}
|
||||
);
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR());
|
||||
code.CallFunction(lut.at(std::make_tuple(fsize, rounding, exact)));
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPRound(code, ctx, inst, 32);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPRound(code, ctx, inst, 64);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
|
||||
FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss);
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
|||
return op;
|
||||
}
|
||||
|
||||
u64 int_result = value.mantissa;
|
||||
u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
|
||||
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
|
||||
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
|
||||
|
||||
|
@ -72,9 +72,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
|||
int_result++;
|
||||
}
|
||||
|
||||
const u64 abs_int_result = Common::MostSignificantBit(int_result) ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
|
||||
|
||||
const FPT result = int_result == 0
|
||||
? FPInfo<FPT>::Zero(sign)
|
||||
: FPRound<FPT>(FPUnpacked<u64>{sign, 0, int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
||||
: FPRound<FPT>(FPUnpacked<u64>{sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
||||
|
||||
if (error != ResidualError::Zero && exact) {
|
||||
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
||||
|
|
|
@ -1449,6 +1449,13 @@ U32U64 IREmitter::FPNeg(const U32U64& a) {
|
|||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) {
|
||||
if (a.GetType() == Type::U32) {
|
||||
return Inst<U32>(Opcode::FPRoundInt32, a, static_cast<u8>(rounding), Imm1(exact));
|
||||
}
|
||||
return Inst<U64>(Opcode::FPRoundInt64, a, static_cast<u8>(rounding), Imm1(exact));
|
||||
}
|
||||
|
||||
U32U64 IREmitter::FPSqrt(const U32U64& a) {
|
||||
if (a.GetType() == Type::U32) {
|
||||
return Inst<U32>(Opcode::FPSqrt32, a);
|
||||
|
|
|
@ -266,6 +266,7 @@ public:
|
|||
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||
U32U64 FPMulAdd(const U32U64& a, const U32U64& b, const U32U64& c, bool fpscr_controlled);
|
||||
U32U64 FPNeg(const U32U64& a);
|
||||
U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
|
||||
U32U64 FPSqrt(const U32U64& a);
|
||||
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
|
||||
|
|
|
@ -384,6 +384,8 @@ OPCODE(FPMulAdd32, T::U32, T::U32, T::U
|
|||
OPCODE(FPMulAdd64, T::U64, T::U64, T::U64, T::U64 )
|
||||
OPCODE(FPNeg32, T::U32, T::U32 )
|
||||
OPCODE(FPNeg64, T::U64, T::U64 )
|
||||
OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 )
|
||||
OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 )
|
||||
OPCODE(FPSqrt32, T::U32, T::U32 )
|
||||
OPCODE(FPSqrt64, T::U64, T::U64 )
|
||||
OPCODE(FPSub32, T::U32, T::U32, T::U32 )
|
||||
|
|
Loading…
Reference in a new issue