IR: Implement FPRoundInt
This commit is contained in:
parent
e24054f4d7
commit
b228694012
5 changed files with 67 additions and 2 deletions
|
@ -793,6 +793,59 @@ void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const auto rounding = static_cast<FP::RoundingMode>(args[1].GetImmediateU8());
|
||||||
|
const bool exact = args[2].GetImmediateU1();
|
||||||
|
|
||||||
|
using fsize_list = mp::list<mp::vlift<size_t(32)>, mp::vlift<size_t(64)>>;
|
||||||
|
using rounding_list = mp::list<
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieEven>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsPlusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsMinusInfinity>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::TowardsZero>,
|
||||||
|
std::integral_constant<FP::RoundingMode, FP::RoundingMode::ToNearest_TieAwayFromZero>
|
||||||
|
>;
|
||||||
|
using exact_list = mp::list<mp::vlift<true>, mp::vlift<false>>;
|
||||||
|
|
||||||
|
using key_type = std::tuple<size_t, FP::RoundingMode, bool>;
|
||||||
|
using value_type = u64(*)(u64, FP::FPSR&, A64::FPCR);
|
||||||
|
|
||||||
|
static const auto lut = mp::GenerateLookupTableFromList<key_type, value_type>(
|
||||||
|
[](auto args) {
|
||||||
|
return std::pair<key_type, value_type>{
|
||||||
|
mp::to_tuple<decltype(args)>,
|
||||||
|
static_cast<value_type>(
|
||||||
|
[](u64 input, FP::FPSR& fpsr, A64::FPCR fpcr) {
|
||||||
|
constexpr auto t = mp::to_tuple<decltype(args)>;
|
||||||
|
constexpr size_t fsize = std::get<0>(t);
|
||||||
|
constexpr FP::RoundingMode rounding_mode = std::get<1>(t);
|
||||||
|
constexpr bool exact = std::get<2>(t);
|
||||||
|
using InputSize = mp::unsigned_integer_of_size<fsize>;
|
||||||
|
|
||||||
|
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
};
|
||||||
|
},
|
||||||
|
mp::cartesian_product<fsize_list, rounding_list, exact_list>{}
|
||||||
|
);
|
||||||
|
|
||||||
|
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||||
|
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||||
|
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR());
|
||||||
|
code.CallFunction(lut.at(std::make_tuple(fsize, rounding, exact)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPRoundInt32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPRound(code, ctx, inst, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitFPRound(code, ctx, inst, 64);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss);
|
FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss);
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 int_result = value.mantissa;
|
u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
|
||||||
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
|
const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
|
||||||
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
|
int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
|
||||||
|
|
||||||
|
@ -72,9 +72,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
||||||
int_result++;
|
int_result++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u64 abs_int_result = Common::MostSignificantBit(int_result) ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
|
||||||
|
|
||||||
const FPT result = int_result == 0
|
const FPT result = int_result == 0
|
||||||
? FPInfo<FPT>::Zero(sign)
|
? FPInfo<FPT>::Zero(sign)
|
||||||
: FPRound<FPT>(FPUnpacked<u64>{sign, 0, int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
: FPRound<FPT>(FPUnpacked<u64>{sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
||||||
|
|
||||||
if (error != ResidualError::Zero && exact) {
|
if (error != ResidualError::Zero && exact) {
|
||||||
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
||||||
|
|
|
@ -1449,6 +1449,13 @@ U32U64 IREmitter::FPNeg(const U32U64& a) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) {
|
||||||
|
if (a.GetType() == Type::U32) {
|
||||||
|
return Inst<U32>(Opcode::FPRoundInt32, a, static_cast<u8>(rounding), Imm1(exact));
|
||||||
|
}
|
||||||
|
return Inst<U64>(Opcode::FPRoundInt64, a, static_cast<u8>(rounding), Imm1(exact));
|
||||||
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::FPSqrt(const U32U64& a) {
|
U32U64 IREmitter::FPSqrt(const U32U64& a) {
|
||||||
if (a.GetType() == Type::U32) {
|
if (a.GetType() == Type::U32) {
|
||||||
return Inst<U32>(Opcode::FPSqrt32, a);
|
return Inst<U32>(Opcode::FPSqrt32, a);
|
||||||
|
|
|
@ -266,6 +266,7 @@ public:
|
||||||
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPMul(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
U32U64 FPMulAdd(const U32U64& a, const U32U64& b, const U32U64& c, bool fpscr_controlled);
|
U32U64 FPMulAdd(const U32U64& a, const U32U64& b, const U32U64& c, bool fpscr_controlled);
|
||||||
U32U64 FPNeg(const U32U64& a);
|
U32U64 FPNeg(const U32U64& a);
|
||||||
|
U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
|
||||||
U32U64 FPSqrt(const U32U64& a);
|
U32U64 FPSqrt(const U32U64& a);
|
||||||
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled);
|
||||||
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
|
U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled);
|
||||||
|
|
|
@ -384,6 +384,8 @@ OPCODE(FPMulAdd32, T::U32, T::U32, T::U
|
||||||
OPCODE(FPMulAdd64, T::U64, T::U64, T::U64, T::U64 )
|
OPCODE(FPMulAdd64, T::U64, T::U64, T::U64, T::U64 )
|
||||||
OPCODE(FPNeg32, T::U32, T::U32 )
|
OPCODE(FPNeg32, T::U32, T::U32 )
|
||||||
OPCODE(FPNeg64, T::U64, T::U64 )
|
OPCODE(FPNeg64, T::U64, T::U64 )
|
||||||
|
OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 )
|
||||||
|
OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 )
|
||||||
OPCODE(FPSqrt32, T::U32, T::U32 )
|
OPCODE(FPSqrt32, T::U32, T::U32 )
|
||||||
OPCODE(FPSqrt64, T::U64, T::U64 )
|
OPCODE(FPSqrt64, T::U64, T::U64 )
|
||||||
OPCODE(FPSub32, T::U32, T::U32, T::U32 )
|
OPCODE(FPSub32, T::U32, T::U32, T::U32 )
|
||||||
|
|
Loading…
Add table
Reference in a new issue