diff --git a/src/common/fp/process_nan.cpp b/src/common/fp/process_nan.cpp index a6d81344..3fb3666a 100644 --- a/src/common/fp/process_nan.cpp +++ b/src/common/fp/process_nan.cpp @@ -31,12 +31,13 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) { } if (fpcr.DN()) { - result = FPInfo::DefaultNaN(); + result = FPT(FPInfo::DefaultNaN()); } return result; } +template u16 FPProcessNaN(FPType type, u16 op, FPCR fpcr, FPSR& fpsr); template u32 FPProcessNaN(FPType type, u32 op, FPCR fpcr, FPSR& fpsr); template u64 FPProcessNaN(FPType type, u64 op, FPCR fpcr, FPSR& fpsr); @@ -57,6 +58,7 @@ std::optional FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, F return std::nullopt; } +template std::optional FPProcessNaNs(FPType type1, FPType type2, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs(FPType type1, FPType type2, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs(FPType type1, FPType type2, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); @@ -83,6 +85,7 @@ std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT return std::nullopt; } +template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u16 op1, u16 op2, u16 op3, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr); diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index b30f47fe..ad6374a1 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -15,7 +15,7 @@ namespace Dynarmic::FP { template -std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { +std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) { constexpr size_t sign_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width; constexpr size_t exponent_high_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width - 1; constexpr size_t exponent_low_bit = FPInfo::explicit_mantissa_width; @@ -23,11 +23,17 @@ std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { constexpr size_t mantissa_low_bit = 0; constexpr int denormal_exponent = FPInfo::exponent_min - int(FPInfo::explicit_mantissa_width); + constexpr bool is_half_precision = std::is_same_v; const bool sign = Common::Bit(op); const FPT exp_raw = Common::Bits(op); const FPT frac_raw = Common::Bits(op); if (exp_raw == 0) { + if constexpr (is_half_precision) { + if (frac_raw == 0 || fpcr.FZ16()) { + return {FPType::Zero, sign, {sign, 0, 0}}; + } + } if (frac_raw == 0 || fpcr.FZ()) { if (frac_raw != 0) { FPProcessException(FPExc::InputDenorm, fpcr, fpsr); @@ -38,7 +44,9 @@ std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)}; } - if (exp_raw == Common::Ones(FPInfo::exponent_width)) { + const bool exp_all_ones = exp_raw == Common::Ones(FPInfo::exponent_width); + const bool ahp_disabled = is_half_precision && !fpcr.AHP(); + if (exp_all_ones || ahp_disabled) { if (frac_raw == 0) { return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)}; } @@ -52,8 +60,9 @@ std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { return {FPType::Nonzero, sign, {sign, exp, frac}}; } -template std::tuple FPUnpack(u32 op, FPCR fpcr, FPSR& fpsr); -template std::tuple FPUnpack(u64 op, FPCR fpcr, FPSR& fpsr); +template std::tuple FPUnpackBase(u16 op, FPCR fpcr, FPSR& fpsr); +template std::tuple FPUnpackBase(u32 op, FPCR fpcr, FPSR& fpsr); +template std::tuple FPUnpackBase(u64 op, FPCR fpcr, FPSR& fpsr); template std::tuple Normalize(FPUnpacked op, int extra_right_shift = 0) { @@ -79,7 +88,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) { fpsr.UFC(true); - return FPInfo::Zero(sign); + return FPT(FPInfo::Zero(sign)); } int biased_exp = std::max(exponent - minimum_exp + 1, 0); @@ -142,13 +151,13 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { #endif constexpr int max_biased_exp = (1 << E) - 1; if (biased_exp >= max_biased_exp) { - result = overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); + result = overflow_to_inf ? FPT(FPInfo::Infinity(sign)) : FPT(FPInfo::MaxNormal(sign)); FPProcessException(FPExc::Overflow, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr); } else { result = sign ? 1 : 0; result <<= E; - result += biased_exp; + result += FPT(biased_exp); result <<= F; result |= static_cast(mantissa) & FPInfo::mantissa_mask; if (error != ResidualError::Zero) { @@ -163,7 +172,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { } else { result = sign ? 1 : 0; result <<= E; - result += biased_exp; + result += FPT(biased_exp); result <<= F; result |= static_cast(mantissa) & FPInfo::mantissa_mask; if (error != ResidualError::Zero) { @@ -174,6 +183,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { return result; } +template u16 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u32 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h index ae206c19..13e43474 100644 --- a/src/common/fp/unpacked.h +++ b/src/common/fp/unpacked.h @@ -52,7 +52,13 @@ constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) { } template -std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); +std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr); + +template +std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { + fpcr.AHP(false); + return FPUnpackBase(op, fpcr, fpsr); +} template std::tuple FPUnpackCV(FPT op, FPCR fpcr, FPSR& fpsr) {