From 8f9fe8690ad2d7747243b8f9cac9564582fcfe36 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 9 Mar 2019 00:08:08 -0500 Subject: [PATCH 1/4] common/fp/unpacked: Adjust FPUnpack to operate like ARM pseudocode This function is defined as always disabling the AHP bit in the fpcr before performing any operations. At the same time, rename the original FPUnpack function to FPUnpackBase to match the pseudocode in the ARM reference manual. --- src/common/fp/unpacked.cpp | 6 +++--- src/common/fp/unpacked.h | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index b30f47fe..45d2b584 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -15,7 +15,7 @@ namespace Dynarmic::FP { template -std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { +std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) { constexpr size_t sign_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width; constexpr size_t exponent_high_bit = FPInfo::exponent_width + FPInfo::explicit_mantissa_width - 1; constexpr size_t exponent_low_bit = FPInfo::explicit_mantissa_width; @@ -52,8 +52,8 @@ std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { return {FPType::Nonzero, sign, {sign, exp, frac}}; } -template std::tuple FPUnpack(u32 op, FPCR fpcr, FPSR& fpsr); -template std::tuple FPUnpack(u64 op, FPCR fpcr, FPSR& fpsr); +template std::tuple FPUnpackBase(u32 op, FPCR fpcr, FPSR& fpsr); +template std::tuple FPUnpackBase(u64 op, FPCR fpcr, FPSR& fpsr); template std::tuple Normalize(FPUnpacked op, int extra_right_shift = 0) { diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h index ae206c19..13e43474 100644 --- a/src/common/fp/unpacked.h +++ b/src/common/fp/unpacked.h @@ -52,7 +52,13 @@ constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) { } template -std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); +std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr); + +template +std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { + fpcr.AHP(false); + return FPUnpackBase(op, fpcr, fpsr); +} template std::tuple FPUnpackCV(FPT op, FPCR fpcr, FPSR& fpsr) { From 7e814de445b6a304955d3f4cf24f578b907f18f3 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 9 Mar 2019 01:18:43 -0500 Subject: [PATCH 2/4] common/fp/unpacked: Handle half-precision unpacking in FPUnpackBase --- src/common/fp/unpacked.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index 45d2b584..9afe4b1b 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -23,11 +23,17 @@ std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) constexpr size_t mantissa_low_bit = 0; constexpr int denormal_exponent = FPInfo::exponent_min - int(FPInfo::explicit_mantissa_width); + constexpr bool is_half_precision = std::is_same_v; const bool sign = Common::Bit(op); const FPT exp_raw = Common::Bits(op); const FPT frac_raw = Common::Bits(op); if (exp_raw == 0) { + if constexpr (is_half_precision) { + if (frac_raw == 0 || fpcr.FZ16()) { + return {FPType::Zero, sign, {sign, 0, 0}}; + } + } if (frac_raw == 0 || fpcr.FZ()) { if (frac_raw != 0) { FPProcessException(FPExc::InputDenorm, fpcr, fpsr); @@ -38,7 +44,9 @@ std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)}; } - if (exp_raw == Common::Ones(FPInfo::exponent_width)) { + const bool exp_all_ones = exp_raw == Common::Ones(FPInfo::exponent_width); + const bool ahp_disabled = is_half_precision && !fpcr.AHP(); + if (exp_all_ones || ahp_disabled) { if (frac_raw == 0) { return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)}; } @@ -52,6 +60,7 @@ std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) return {FPType::Nonzero, sign, {sign, exp, frac}}; } +template std::tuple FPUnpackBase(u16 op, FPCR fpcr, FPSR& fpsr); template std::tuple FPUnpackBase(u32 op, FPCR fpcr, FPSR& fpsr); template std::tuple FPUnpackBase(u64 op, FPCR fpcr, FPSR& fpsr); From 14f55d74760057c5ba08e11b838df02d63504e9c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 9 Mar 2019 01:23:27 -0500 Subject: [PATCH 3/4] common/fp/unpacked: Add half-precision instantiation of FPRoundBase --- src/common/fp/unpacked.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index 9afe4b1b..ad6374a1 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -88,7 +88,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) { fpsr.UFC(true); - return FPInfo::Zero(sign); + return FPT(FPInfo::Zero(sign)); } int biased_exp = std::max(exponent - minimum_exp + 1, 0); @@ -151,13 +151,13 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { #endif constexpr int max_biased_exp = (1 << E) - 1; if (biased_exp >= max_biased_exp) { - result = overflow_to_inf ? FPInfo::Infinity(sign) : FPInfo::MaxNormal(sign); + result = overflow_to_inf ? FPT(FPInfo::Infinity(sign)) : FPT(FPInfo::MaxNormal(sign)); FPProcessException(FPExc::Overflow, fpcr, fpsr); FPProcessException(FPExc::Inexact, fpcr, fpsr); } else { result = sign ? 1 : 0; result <<= E; - result += biased_exp; + result += FPT(biased_exp); result <<= F; result |= static_cast(mantissa) & FPInfo::mantissa_mask; if (error != ResidualError::Zero) { @@ -172,7 +172,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { } else { result = sign ? 1 : 0; result <<= E; - result += biased_exp; + result += FPT(biased_exp); result <<= F; result |= static_cast(mantissa) & FPInfo::mantissa_mask; if (error != ResidualError::Zero) { @@ -183,6 +183,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) { return result; } +template u16 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u32 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); template u64 FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr); From 7030b9af9591316578a9a2d0e1437b2ffca46575 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 9 Mar 2019 01:25:17 -0500 Subject: [PATCH 4/4] common/fp/process_nan: Add half-precision instantiations for NaN processing functions --- src/common/fp/process_nan.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/fp/process_nan.cpp b/src/common/fp/process_nan.cpp index a6d81344..3fb3666a 100644 --- a/src/common/fp/process_nan.cpp +++ b/src/common/fp/process_nan.cpp @@ -31,12 +31,13 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) { } if (fpcr.DN()) { - result = FPInfo::DefaultNaN(); + result = FPT(FPInfo::DefaultNaN()); } return result; } +template u16 FPProcessNaN(FPType type, u16 op, FPCR fpcr, FPSR& fpsr); template u32 FPProcessNaN(FPType type, u32 op, FPCR fpcr, FPSR& fpsr); template u64 FPProcessNaN(FPType type, u64 op, FPCR fpcr, FPSR& fpsr); @@ -57,6 +58,7 @@ std::optional FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, F return std::nullopt; } +template std::optional FPProcessNaNs(FPType type1, FPType type2, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs(FPType type1, FPType type2, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs(FPType type1, FPType type2, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); @@ -83,6 +85,7 @@ std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT return std::nullopt; } +template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u16 op1, u16 op2, u16 op3, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr); template std::optional FPProcessNaNs3(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);