Merge pull request #452 from lioncash/frecpx
A64: Implement FRECPX's half-precision floating-point variant
This commit is contained in:
commit
42b090d234
10 changed files with 35 additions and 22 deletions
|
@ -728,6 +728,10 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
code.CallFunction(&FP::FPRecipExponent<FPT>);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPRecipExponent16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPRecipExponent<u16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitFPRecipExponent<u32>(code, ctx, inst);
|
||||
}
|
||||
|
|
|
@ -17,28 +17,16 @@
|
|||
|
||||
namespace Dynarmic::FP {
|
||||
namespace {
|
||||
// We don't care about unreachable code warnings here
|
||||
// TODO: Remove this warning disabling of warnings when
|
||||
// half-float support is added.
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4702)
|
||||
#endif
|
||||
template <typename FPT>
|
||||
FPT DetermineExponentValue(size_t value) {
|
||||
if constexpr (sizeof(FPT) == sizeof(u32)) {
|
||||
return static_cast<FPT>(Common::Bits<23, 30>(value));
|
||||
}
|
||||
|
||||
if constexpr (sizeof(FPT) == sizeof(u64)) {
|
||||
} else if constexpr (sizeof(FPT) == sizeof(u64)) {
|
||||
return static_cast<FPT>(Common::Bits<52, 62>(value));
|
||||
} else {
|
||||
return static_cast<FPT>(Common::Bits<10, 14>(value));
|
||||
}
|
||||
|
||||
// Half-float
|
||||
return static_cast<FPT>(Common::Bits<10, 14>(value));
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(default:4702)
|
||||
#endif
|
||||
} // Anonymous namespace
|
||||
|
||||
template <typename FPT>
|
||||
|
@ -50,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
return FPProcessNaN(type, op, fpcr, fpsr);
|
||||
}
|
||||
|
||||
const FPT sign_bits = FPInfo<FPT>::Zero(sign);
|
||||
const FPT sign_bits = FPT(FPInfo<FPT>::Zero(sign));
|
||||
const FPT exponent = DetermineExponentValue<FPT>(op);
|
||||
|
||||
// Zero and denormals
|
||||
|
@ -64,6 +52,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
return FPT(sign_bits | negated_exponent);
|
||||
}
|
||||
|
||||
template u16 FPRecipExponent<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
|
||||
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
|
||||
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
|
||||
|
||||
|
|
|
@ -33,7 +33,9 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
|
|||
if (frac_raw == 0 || fpcr.FZ16()) {
|
||||
return {FPType::Zero, sign, {sign, 0, 0}};
|
||||
}
|
||||
return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
|
||||
}
|
||||
|
||||
if (frac_raw == 0 || fpcr.FZ()) {
|
||||
if (frac_raw != 0) {
|
||||
FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
|
||||
|
@ -46,7 +48,7 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
|
|||
|
||||
const bool exp_all_ones = exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width);
|
||||
const bool ahp_disabled = is_half_precision && !fpcr.AHP();
|
||||
if (exp_all_ones || ahp_disabled) {
|
||||
if ((exp_all_ones && !is_half_precision) || (exp_all_ones && ahp_disabled)) {
|
||||
if (frac_raw == 0) {
|
||||
return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
|
||||
}
|
||||
|
|
|
@ -420,7 +420,7 @@ INST(FCVTPS_2, "FCVTPS (vector)", "01011
|
|||
INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd")
|
||||
//INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd")
|
||||
INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd")
|
||||
//INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd")
|
||||
INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd")
|
||||
INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd")
|
||||
//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd")
|
||||
INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd")
|
||||
|
|
|
@ -182,6 +182,14 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::FRECPX_1(Vec Vn, Vec Vd) {
|
||||
const IR::U16 operand = V_scalar(16, Vn);
|
||||
const IR::U16 result = ir.FPRecipExponent(operand);
|
||||
|
||||
V_scalar(16, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
|
||||
const size_t esize = sz ? 64 : 32;
|
||||
|
||||
|
|
|
@ -1895,11 +1895,18 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) {
|
|||
return Inst<U64>(Opcode::FPRecipEstimate64, a);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::FPRecipExponent(const U32U64& a) {
|
||||
if (a.GetType() == Type::U32) {
|
||||
U16U32U64 IREmitter::FPRecipExponent(const U16U32U64& a) {
|
||||
switch (a.GetType()) {
|
||||
case Type::U16:
|
||||
return Inst<U16>(Opcode::FPRecipExponent16, a);
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::FPRecipExponent32, a);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::FPRecipExponent64, a);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return U16U32U64{};
|
||||
}
|
||||
return Inst<U64>(Opcode::FPRecipExponent64, a);
|
||||
}
|
||||
|
||||
U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) {
|
||||
|
|
|
@ -305,7 +305,7 @@ public:
|
|||
U32U64 FPMulX(const U32U64& a, const U32U64& b);
|
||||
U32U64 FPNeg(const U32U64& a);
|
||||
U32U64 FPRecipEstimate(const U32U64& a);
|
||||
U32U64 FPRecipExponent(const U32U64& a);
|
||||
U16U32U64 FPRecipExponent(const U16U32U64& a);
|
||||
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
|
||||
U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
|
||||
U32U64 FPRSqrtEstimate(const U32U64& a);
|
||||
|
|
|
@ -273,6 +273,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
|
|||
case Opcode::FPMulAdd64:
|
||||
case Opcode::FPRecipEstimate32:
|
||||
case Opcode::FPRecipEstimate64:
|
||||
case Opcode::FPRecipExponent16:
|
||||
case Opcode::FPRecipExponent32:
|
||||
case Opcode::FPRecipExponent64:
|
||||
case Opcode::FPRecipStepFused32:
|
||||
|
|
|
@ -486,6 +486,7 @@ OPCODE(FPNeg32, U32, U32
|
|||
OPCODE(FPNeg64, U64, U64 )
|
||||
OPCODE(FPRecipEstimate32, U32, U32 )
|
||||
OPCODE(FPRecipEstimate64, U64, U64 )
|
||||
OPCODE(FPRecipExponent16, U16, U16 )
|
||||
OPCODE(FPRecipExponent32, U32, U32 )
|
||||
OPCODE(FPRecipExponent64, U64, U64 )
|
||||
OPCODE(FPRecipStepFused32, U32, U32, U32 )
|
||||
|
|
|
@ -163,6 +163,7 @@ using U32 = TypedValue<Type::U32>;
|
|||
using U64 = TypedValue<Type::U64>;
|
||||
using U128 = TypedValue<Type::U128>;
|
||||
using U32U64 = TypedValue<Type::U32 | Type::U64>;
|
||||
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
|
||||
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
|
||||
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
|
||||
using NZCV = TypedValue<Type::NZCVFlags>;
|
||||
|
|
Loading…
Reference in a new issue