Merge pull request #452 from lioncash/frecpx

A64: Implement FRECPX's half-precision floating-point variant
This commit is contained in:
Merry 2019-03-10 20:43:55 +00:00 committed by MerryMage
commit 42b090d234
10 changed files with 35 additions and 22 deletions

View file

@ -728,6 +728,10 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
code.CallFunction(&FP::FPRecipExponent<FPT>); code.CallFunction(&FP::FPRecipExponent<FPT>);
} }
void EmitX64::EmitFPRecipExponent16(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipExponent<u16>(code, ctx, inst);
}
void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) {
EmitFPRecipExponent<u32>(code, ctx, inst); EmitFPRecipExponent<u32>(code, ctx, inst);
} }

View file

@ -17,28 +17,16 @@
namespace Dynarmic::FP { namespace Dynarmic::FP {
namespace { namespace {
// We don't care about unreachable code warnings here
// TODO: Remove this warning disabling of warnings when
// half-float support is added.
#ifdef _MSC_VER
#pragma warning(disable:4702)
#endif
template <typename FPT> template <typename FPT>
FPT DetermineExponentValue(size_t value) { FPT DetermineExponentValue(size_t value) {
if constexpr (sizeof(FPT) == sizeof(u32)) { if constexpr (sizeof(FPT) == sizeof(u32)) {
return static_cast<FPT>(Common::Bits<23, 30>(value)); return static_cast<FPT>(Common::Bits<23, 30>(value));
} } else if constexpr (sizeof(FPT) == sizeof(u64)) {
if constexpr (sizeof(FPT) == sizeof(u64)) {
return static_cast<FPT>(Common::Bits<52, 62>(value)); return static_cast<FPT>(Common::Bits<52, 62>(value));
} } else {
// Half-float
return static_cast<FPT>(Common::Bits<10, 14>(value)); return static_cast<FPT>(Common::Bits<10, 14>(value));
}
} }
#ifdef _MSC_VER
#pragma warning(default:4702)
#endif
} // Anonymous namespace } // Anonymous namespace
template <typename FPT> template <typename FPT>
@ -50,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
return FPProcessNaN(type, op, fpcr, fpsr); return FPProcessNaN(type, op, fpcr, fpsr);
} }
const FPT sign_bits = FPInfo<FPT>::Zero(sign); const FPT sign_bits = FPT(FPInfo<FPT>::Zero(sign));
const FPT exponent = DetermineExponentValue<FPT>(op); const FPT exponent = DetermineExponentValue<FPT>(op);
// Zero and denormals // Zero and denormals
@ -64,6 +52,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
return FPT(sign_bits | negated_exponent); return FPT(sign_bits | negated_exponent);
} }
template u16 FPRecipExponent<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr); template u32 FPRecipExponent<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr); template u64 FPRecipExponent<u64>(u64 op, FPCR fpcr, FPSR& fpsr);

View file

@ -33,7 +33,9 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
if (frac_raw == 0 || fpcr.FZ16()) { if (frac_raw == 0 || fpcr.FZ16()) {
return {FPType::Zero, sign, {sign, 0, 0}}; return {FPType::Zero, sign, {sign, 0, 0}};
} }
return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
} }
if (frac_raw == 0 || fpcr.FZ()) { if (frac_raw == 0 || fpcr.FZ()) {
if (frac_raw != 0) { if (frac_raw != 0) {
FPProcessException(FPExc::InputDenorm, fpcr, fpsr); FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
@ -46,7 +48,7 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
const bool exp_all_ones = exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width); const bool exp_all_ones = exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width);
const bool ahp_disabled = is_half_precision && !fpcr.AHP(); const bool ahp_disabled = is_half_precision && !fpcr.AHP();
if (exp_all_ones || ahp_disabled) { if ((exp_all_ones && !is_half_precision) || (exp_all_ones && ahp_disabled)) {
if (frac_raw == 0) { if (frac_raw == 0) {
return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)}; return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
} }

View file

@ -420,7 +420,7 @@ INST(FCVTPS_2, "FCVTPS (vector)", "01011
INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd")
//INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") //INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd")
INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd")
//INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd")
INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd")
//INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") //INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd")
INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd")

View file

@ -182,6 +182,14 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::FRECPX_1(Vec Vn, Vec Vd) {
const IR::U16 operand = V_scalar(16, Vn);
const IR::U16 result = ir.FPRecipExponent(operand);
V_scalar(16, Vd, result);
return true;
}
bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) { bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) {
const size_t esize = sz ? 64 : 32; const size_t esize = sz ? 64 : 32;

View file

@ -1895,11 +1895,18 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) {
return Inst<U64>(Opcode::FPRecipEstimate64, a); return Inst<U64>(Opcode::FPRecipEstimate64, a);
} }
U32U64 IREmitter::FPRecipExponent(const U32U64& a) { U16U32U64 IREmitter::FPRecipExponent(const U16U32U64& a) {
if (a.GetType() == Type::U32) { switch (a.GetType()) {
case Type::U16:
return Inst<U16>(Opcode::FPRecipExponent16, a);
case Type::U32:
return Inst<U32>(Opcode::FPRecipExponent32, a); return Inst<U32>(Opcode::FPRecipExponent32, a);
} case Type::U64:
return Inst<U64>(Opcode::FPRecipExponent64, a); return Inst<U64>(Opcode::FPRecipExponent64, a);
default:
UNREACHABLE();
return U16U32U64{};
}
} }
U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) {

View file

@ -305,7 +305,7 @@ public:
U32U64 FPMulX(const U32U64& a, const U32U64& b); U32U64 FPMulX(const U32U64& a, const U32U64& b);
U32U64 FPNeg(const U32U64& a); U32U64 FPNeg(const U32U64& a);
U32U64 FPRecipEstimate(const U32U64& a); U32U64 FPRecipEstimate(const U32U64& a);
U32U64 FPRecipExponent(const U32U64& a); U16U32U64 FPRecipExponent(const U16U32U64& a);
U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b);
U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact);
U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPRSqrtEstimate(const U32U64& a);

View file

@ -273,6 +273,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const {
case Opcode::FPMulAdd64: case Opcode::FPMulAdd64:
case Opcode::FPRecipEstimate32: case Opcode::FPRecipEstimate32:
case Opcode::FPRecipEstimate64: case Opcode::FPRecipEstimate64:
case Opcode::FPRecipExponent16:
case Opcode::FPRecipExponent32: case Opcode::FPRecipExponent32:
case Opcode::FPRecipExponent64: case Opcode::FPRecipExponent64:
case Opcode::FPRecipStepFused32: case Opcode::FPRecipStepFused32:

View file

@ -486,6 +486,7 @@ OPCODE(FPNeg32, U32, U32
OPCODE(FPNeg64, U64, U64 ) OPCODE(FPNeg64, U64, U64 )
OPCODE(FPRecipEstimate32, U32, U32 ) OPCODE(FPRecipEstimate32, U32, U32 )
OPCODE(FPRecipEstimate64, U64, U64 ) OPCODE(FPRecipEstimate64, U64, U64 )
OPCODE(FPRecipExponent16, U16, U16 )
OPCODE(FPRecipExponent32, U32, U32 ) OPCODE(FPRecipExponent32, U32, U32 )
OPCODE(FPRecipExponent64, U64, U64 ) OPCODE(FPRecipExponent64, U64, U64 )
OPCODE(FPRecipStepFused32, U32, U32, U32 ) OPCODE(FPRecipStepFused32, U32, U32, U32 )

View file

@ -163,6 +163,7 @@ using U32 = TypedValue<Type::U32>;
using U64 = TypedValue<Type::U64>; using U64 = TypedValue<Type::U64>;
using U128 = TypedValue<Type::U128>; using U128 = TypedValue<Type::U128>;
using U32U64 = TypedValue<Type::U32 | Type::U64>; using U32U64 = TypedValue<Type::U32 | Type::U64>;
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>; using UAnyU128 = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64 | Type::U128>;
using NZCV = TypedValue<Type::NZCVFlags>; using NZCV = TypedValue<Type::NZCVFlags>;