From 8f9fe8690ad2d7747243b8f9cac9564582fcfe36 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 9 Mar 2019 00:08:08 -0500
Subject: [PATCH 1/4] common/fp/unpacked: Adjust FPUnpack to operate like ARM
 pseudocode

This function is defined as always disabling the AHP bit in the fpcr
before performing any operations.

At the same time, rename the original FPUnpack function to FPUnpackBase
to match the pseudocode in the ARM reference manual.
---
 src/common/fp/unpacked.cpp | 6 +++---
 src/common/fp/unpacked.h   | 8 +++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
index b30f47fe..45d2b584 100644
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@@ -15,7 +15,7 @@
 namespace Dynarmic::FP {
 
 template<typename FPT>
-std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
+std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) {
     constexpr size_t sign_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width;
     constexpr size_t exponent_high_bit = FPInfo<FPT>::exponent_width + FPInfo<FPT>::explicit_mantissa_width - 1;
     constexpr size_t exponent_low_bit = FPInfo<FPT>::explicit_mantissa_width;
@@ -52,8 +52,8 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
     return {FPType::Nonzero, sign, {sign, exp, frac}};
 }
 
-template std::tuple<FPType, bool, FPUnpacked> FPUnpack<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
-template std::tuple<FPType, bool, FPUnpacked> FPUnpack<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
 
 template<size_t F>
 std::tuple<bool, int, u64, ResidualError> Normalize(FPUnpacked op, int extra_right_shift = 0) {
diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h
index ae206c19..13e43474 100644
--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@@ -52,7 +52,13 @@ constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) {
 }
 
 template<typename FPT>
-std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
+std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr);
+
+template<typename FPT>
+std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
+    fpcr.AHP(false);
+    return FPUnpackBase(op, fpcr, fpsr);
+}
 
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked> FPUnpackCV(FPT op, FPCR fpcr, FPSR& fpsr) {

From 7e814de445b6a304955d3f4cf24f578b907f18f3 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 9 Mar 2019 01:18:43 -0500
Subject: [PATCH 2/4] common/fp/unpacked: Handle half-precision unpacking in
 FPUnpackBase

---
 src/common/fp/unpacked.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
index 45d2b584..9afe4b1b 100644
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@@ -23,11 +23,17 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
     constexpr size_t mantissa_low_bit = 0;
     constexpr int denormal_exponent = FPInfo<FPT>::exponent_min - int(FPInfo<FPT>::explicit_mantissa_width);
 
+    constexpr bool is_half_precision = std::is_same_v<FPT, u16>;
     const bool sign = Common::Bit<sign_bit>(op);
     const FPT exp_raw = Common::Bits<exponent_low_bit, exponent_high_bit>(op);
     const FPT frac_raw = Common::Bits<mantissa_low_bit, mantissa_high_bit>(op);
 
     if (exp_raw == 0) {
+        if constexpr (is_half_precision) {
+            if (frac_raw == 0 || fpcr.FZ16()) {
+                return {FPType::Zero, sign, {sign, 0, 0}};
+            }
+        }
         if (frac_raw == 0 || fpcr.FZ()) {
             if (frac_raw != 0) {
                 FPProcessException(FPExc::InputDenorm, fpcr, fpsr);
@@ -38,7 +44,9 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
         return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
     }
 
-    if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
+    const bool exp_all_ones = exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width);
+    const bool ahp_disabled = is_half_precision && !fpcr.AHP();
+    if (exp_all_ones || ahp_disabled) {
         if (frac_raw == 0) {
             return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
         }
@@ -52,6 +60,7 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr)
     return {FPType::Nonzero, sign, {sign, exp, frac}};
 }
 
+template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
 template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
 template std::tuple<FPType, bool, FPUnpacked> FPUnpackBase<u64>(u64 op, FPCR fpcr, FPSR& fpsr);
 

From 14f55d74760057c5ba08e11b838df02d63504e9c Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 9 Mar 2019 01:23:27 -0500
Subject: [PATCH 3/4] common/fp/unpacked: Add half-precision instantiation of
 FPRoundBase

---
 src/common/fp/unpacked.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
index 9afe4b1b..ad6374a1 100644
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@@ -88,7 +88,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
 
     if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
         fpsr.UFC(true);
-        return FPInfo<FPT>::Zero(sign);
+        return FPT(FPInfo<FPT>::Zero(sign));
     }
 
     int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
@@ -151,13 +151,13 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
 #endif
         constexpr int max_biased_exp = (1 << E) - 1;
         if (biased_exp >= max_biased_exp) {
-            result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
+            result = overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
             FPProcessException(FPExc::Overflow, fpcr, fpsr);
             FPProcessException(FPExc::Inexact, fpcr, fpsr);
         } else {
             result = sign ? 1 : 0;
             result <<= E;
-            result += biased_exp;
+            result += FPT(biased_exp);
             result <<= F;
             result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
             if (error != ResidualError::Zero) {
@@ -172,7 +172,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
         } else {
             result = sign ? 1 : 0;
             result <<= E;
-            result += biased_exp;
+            result += FPT(biased_exp);
             result <<= F;
             result |= static_cast<FPT>(mantissa) & FPInfo<FPT>::mantissa_mask;
             if (error != ResidualError::Zero) {
@@ -183,6 +183,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
     return result;
 }
 
+template u16 FPRoundBase<u16>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u32 FPRoundBase<u32>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 template u64 FPRoundBase<u64>(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr);
 

From 7030b9af9591316578a9a2d0e1437b2ffca46575 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Sat, 9 Mar 2019 01:25:17 -0500
Subject: [PATCH 4/4] common/fp/process_nan: Add half-precision instantiations
 for NaN processing functions

---
 src/common/fp/process_nan.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/common/fp/process_nan.cpp b/src/common/fp/process_nan.cpp
index a6d81344..3fb3666a 100644
--- a/src/common/fp/process_nan.cpp
+++ b/src/common/fp/process_nan.cpp
@@ -31,12 +31,13 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) {
     }
 
     if (fpcr.DN()) {
-        result = FPInfo<FPT>::DefaultNaN();
+        result = FPT(FPInfo<FPT>::DefaultNaN());
     }
 
     return result;
 }
 
+template u16 FPProcessNaN<u16>(FPType type, u16 op, FPCR fpcr, FPSR& fpsr);
 template u32 FPProcessNaN<u32>(FPType type, u32 op, FPCR fpcr, FPSR& fpsr);
 template u64 FPProcessNaN<u64>(FPType type, u64 op, FPCR fpcr, FPSR& fpsr);
 
@@ -57,6 +58,7 @@ std::optional<FPT> FPProcessNaNs(FPType type1, FPType type2, FPT op1, FPT op2, F
     return std::nullopt;
 }
 
+template std::optional<u16> FPProcessNaNs<u16>(FPType type1, FPType type2, u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
 template std::optional<u32> FPProcessNaNs<u32>(FPType type1, FPType type2, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
 template std::optional<u64> FPProcessNaNs<u64>(FPType type1, FPType type2, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
 
@@ -83,6 +85,7 @@ std::optional<FPT> FPProcessNaNs3(FPType type1, FPType type2, FPType type3, FPT
     return std::nullopt;
 }
 
+template std::optional<u16> FPProcessNaNs3<u16>(FPType type1, FPType type2, FPType type3, u16 op1, u16 op2, u16 op3, FPCR fpcr, FPSR& fpsr);
 template std::optional<u32> FPProcessNaNs3<u32>(FPType type1, FPType type2, FPType type3, u32 op1, u32 op2, u32 op3, FPCR fpcr, FPSR& fpsr);
 template std::optional<u64> FPProcessNaNs3<u64>(FPType type1, FPType type2, FPType type3, u64 op1, u64 op2, u64 op3, FPCR fpcr, FPSR& fpsr);