From 7a673a8a4321c2e1419ed5e56397c6fd04403992 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 25 Jul 2018 17:39:14 +0100
Subject: [PATCH] fp: Change FPUnpacked to a normalized representation

Having a known position for the highest set bit makes writing algorithms easier
---
 src/common/bit_util.h                 |  2 +-
 src/common/fp/fused.cpp               | 29 +++++++---------------
 src/common/fp/fused.h                 |  1 +
 src/common/fp/op/FPRSqrtEstimate.cpp  |  7 +++---
 src/common/fp/op/FPRSqrtStepFused.cpp |  2 +-
 src/common/fp/op/FPRoundInt.cpp       | 11 ++++++---
 src/common/fp/op/FPToFixed.cpp        | 12 ++++-----
 src/common/fp/unpacked.cpp            | 10 ++++----
 src/common/fp/unpacked.h              | 18 +++++++++++++-
 tests/fp/unpacked_tests.cpp           | 35 ++++++++++++++++-----------
 10 files changed, 71 insertions(+), 56 deletions(-)
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index 2a515533..31dec21b 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -150,7 +150,7 @@ inline size_t BitCount(Integral value) {
 }
 
 template <typename T>
-inline int HighestSetBit(T value) {
+constexpr int HighestSetBit(T value) {
     auto x = static_cast<std::make_unsigned_t<T>>(value);
     int result = -1;
     while (x != 0) {
diff --git a/src/common/fp/fused.cpp b/src/common/fp/fused.cpp
index 9f43c6c4..44e6b765 100644
--- a/src/common/fp/fused.cpp
+++ b/src/common/fp/fused.cpp
@@ -11,26 +11,15 @@
 
 namespace Dynarmic::FP {
 
-constexpr size_t normalized_point_position = 62;
 constexpr size_t product_point_position = normalized_point_position * 2;
 
-static FPUnpacked NormalizeUnpacked(FPUnpacked op) {
-    constexpr int desired_highest = static_cast<int>(normalized_point_position);
-
-    const int highest_bit = Common::HighestSetBit(op.mantissa);
-    DEBUG_ASSERT(highest_bit < desired_highest);
-
-    const int offset = desired_highest - highest_bit;
-    op.mantissa <<= offset;
-    op.exponent -= offset;
-    return op;
+static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa) {
+    constexpr int point_position_correction = normalized_point_position - (product_point_position - 64);
+    // We round-to-odd here when reducing the bitwidth of the mantissa so that subsequent roundings are accurate.
+    return {sign, exponent + point_position_correction, mantissa.upper | static_cast<u64>(mantissa.lower != 0)};
 }
 
 FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
-    addend = NormalizeUnpacked(addend);
-    op1 = NormalizeUnpacked(op1);
-    op2 = NormalizeUnpacked(op2);
-
     const bool product_sign = op1.sign != op2.sign;
     const auto [product_exponent, product_value] = [op1, op2]{
         int exponent = op1.exponent + op2.exponent;
@@ -47,10 +36,10 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
     }
 
     if (addend.mantissa == 0) {
-        return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)};
+        return ReduceMantissa(product_sign, product_exponent, product_value);
     }
 
-    const int exp_diff = product_exponent - (addend.exponent - normalized_point_position);
+    const int exp_diff = product_exponent - addend.exponent;
 
     if (product_sign == addend.sign) {
         // Addition
@@ -63,7 +52,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
 
         // addend < product
         const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
-        return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)};
+        return ReduceMantissa(product_sign, product_exponent, result);
     }
 
     // Subtraction
@@ -80,7 +69,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
         result = product_value - addend_long;
     } else if (exp_diff <= 0) {
         result_sign = !product_sign;
-        result_exponent = addend.exponent - normalized_point_position;
+        result_exponent = addend.exponent;
         result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff);
     } else {
         result_sign = product_sign;
@@ -95,7 +84,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
     const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper);
     result = result << required_shift;
     result_exponent -= required_shift;
-    return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)};
+    return ReduceMantissa(result_sign, result_exponent, result);
 }
 
 } // namespace Dynarmic::FP
diff --git a/src/common/fp/fused.h b/src/common/fp/fused.h
index cdbc16ee..94bd5540 100644
--- a/src/common/fp/fused.h
+++ b/src/common/fp/fused.h
@@ -10,6 +10,7 @@ namespace Dynarmic::FP {
 
 struct FPUnpacked;
 
+/// This function assumes all arguments have been normalized.
 FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
 
 } // namespace Dynarmic::FP
diff --git a/src/common/fp/op/FPRSqrtEstimate.cpp b/src/common/fp/op/FPRSqrtEstimate.cpp
index ff0590b0..28e6d650 100644
--- a/src/common/fp/op/FPRSqrtEstimate.cpp
+++ b/src/common/fp/op/FPRSqrtEstimate.cpp
@@ -79,11 +79,10 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
         return FPInfo<FPT>::Zero(false);
     }
 
-    const int highest_bit = Common::HighestSetBit(value.mantissa);
-    const int result_exponent = (-(value.exponent + highest_bit + 1)) >> 1;
-    const bool was_exponent_odd = (value.exponent + highest_bit) % 2 == 0;
+    const int result_exponent = (-(value.exponent + 1)) >> 1;
+    const bool was_exponent_odd = (value.exponent) % 2 == 0;
 
-    const u64 scaled = Safe::LogicalShiftRight(value.mantissa, highest_bit - (was_exponent_odd ? 7 : 8));
+    const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - (was_exponent_odd ? 7 : 8));
     const u64 estimate = RecipSqrtEstimate(scaled);
 
     const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
diff --git a/src/common/fp/op/FPRSqrtStepFused.cpp b/src/common/fp/op/FPRSqrtStepFused.cpp
index b5ce5570..e83db7b2 100644
--- a/src/common/fp/op/FPRSqrtStepFused.cpp
+++ b/src/common/fp/op/FPRSqrtStepFused.cpp
@@ -41,7 +41,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
     }
 
     // result_value = (3.0 + (value1 * value2)) / 2.0
-    FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
+    FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 3), value1, value2);
     result_value.exponent--;
 
     if (result_value.mantissa == 0) {
diff --git a/src/common/fp/op/FPRoundInt.cpp b/src/common/fp/op/FPRoundInt.cpp
index a14c21ca..f22c83d3 100644
--- a/src/common/fp/op/FPRoundInt.cpp
+++ b/src/common/fp/op/FPRoundInt.cpp
@@ -38,14 +38,17 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
         return FPInfo<FPT>::Zero(sign);
     }
 
-    if (value.exponent >= 0) {
+    // Reshift decimal point back to bit zero.
+    const int exponent = value.exponent - normalized_point_position;
+
+    if (exponent >= 0) {
         // Guaranteed to be an integer
         return op;
     }
 
     u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
-    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
-    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
 
     bool round_up = false;
     switch (rounding) {
@@ -77,7 +80,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
 
     const FPT result = int_result == 0
                      ? FPInfo<FPT>::Zero(sign)
-                     : FPRound<FPT>(FPUnpacked{new_sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
+                     : FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
 
     if (error != ResidualError::Zero && exact) {
         FPProcessException(FPExc::Inexact, fpcr, fpsr);
diff --git a/src/common/fp/op/FPToFixed.cpp b/src/common/fp/op/FPToFixed.cpp
index f93d021b..97f549e6 100644
--- a/src/common/fp/op/FPToFixed.cpp
+++ b/src/common/fp/op/FPToFixed.cpp
@@ -40,12 +40,12 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
         return 0;
     }
 
-    // value *= 2.0^fbits
-    value.exponent += static_cast<int>(fbits);
+    // value *= 2.0^fbits and reshift the decimal point back to bit zero.
+    int exponent = value.exponent + static_cast<int>(fbits) - normalized_point_position;
 
     u64 int_result = sign ? Safe::Negate<u64>(value.mantissa) : static_cast<u64>(value.mantissa);
-    const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent);
-    int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent);
+    const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent);
+    int_result = Safe::ArithmeticShiftLeft(int_result, exponent);
 
     bool round_up = false;
     switch (rounding) {
@@ -74,7 +74,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
 
     // Detect Overflow
     const int min_exponent_for_overflow = static_cast<int>(ibits) - static_cast<int>(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1);
-    if (value.exponent >= min_exponent_for_overflow) {
+    if (exponent >= min_exponent_for_overflow) {
         // Positive overflow
         if (unsigned_ || !sign) {
             FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
@@ -83,7 +83,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou
 
         // Negative overflow
         const u64 min_value = Safe::Negate<u64>(static_cast<u64>(1) << (ibits - 1));
-        if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) {
+        if (!(exponent == min_exponent_for_overflow && int_result == min_value)) {
             FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
             return static_cast<u64>(1) << (ibits - 1);
         }
diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp
index f9efe994..b30f47fe 100644
--- a/src/common/fp/unpacked.cpp
+++ b/src/common/fp/unpacked.cpp
@@ -35,20 +35,20 @@ std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) {
             return {FPType::Zero, sign, {sign, 0, 0}};
         }
 
-        return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}};
+        return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)};
     }
 
     if (exp_raw == Common::Ones<FPT>(FPInfo<FPT>::exponent_width)) {
         if (frac_raw == 0) {
-            return {FPType::Infinity, sign, {sign, 1000000, 1}};
+            return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)};
         }
 
         const bool is_quiet = Common::Bit<mantissa_high_bit>(frac_raw);
         return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}};
     }
 
-    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias - FPInfo<FPT>::explicit_mantissa_width;
-    const u64 frac = frac_raw | FPInfo<FPT>::implicit_leading_bit;
+    const int exp = static_cast<int>(exp_raw) - FPInfo<FPT>::exponent_bias;
+    const u64 frac = static_cast<u64>(frac_raw | FPInfo<FPT>::implicit_leading_bit) << (normalized_point_position - FPInfo<FPT>::explicit_mantissa_width);
     return {FPType::Nonzero, sign, {sign, exp, frac}};
 }
 
@@ -61,7 +61,7 @@ std::tuple<bool, int, u64, ResidualError> Normalize(FPUnpacked op, int extra_rig
     const int shift_amount = highest_set_bit - static_cast<int>(F) + extra_right_shift;
     const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount);
     const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount);
-    const int exponent = op.exponent + highest_set_bit;
+    const int exponent = op.exponent + highest_set_bit - normalized_point_position;
     return std::make_tuple(op.sign, exponent, mantissa, error);
 }
 
diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h
index a0961bd7..132fedd0 100644
--- a/src/common/fp/unpacked.h
+++ b/src/common/fp/unpacked.h
@@ -24,7 +24,10 @@ enum class FPType {
     SNaN,
 };
 
-/// value = (sign ? -1 : +1) * mantissa * 2^exponent
+constexpr size_t normalized_point_position = 62;
+
+/// value = (sign ? -1 : +1) * mantissa/(2^62) * 2^exponent
+/// 63rd bit of mantissa is always set (unless value is zero)
 struct FPUnpacked {
     bool sign;
     int exponent;
@@ -35,6 +38,19 @@ inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) {
     return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa);
 }
 
+/// return value = (sign ? -1 : +1) * value * 2^exponent
+constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) {
+    if (value == 0) {
+        return {sign, 0, 0};
+    }
+
+    const int highest_bit = Common::HighestSetBit(value);
+    const int offset = static_cast<int>(normalized_point_position) - highest_bit;
+    value <<= offset;
+    exponent -= offset - normalized_point_position;
+    return {sign, exponent, value};
+}
+
 template<typename FPT>
 std::tuple<FPType, bool, FPUnpacked> FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr);
 
diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp
index 1f7cd12c..6a0d5a30 100644
--- a/tests/fp/unpacked_tests.cpp
+++ b/tests/fp/unpacked_tests.cpp
@@ -20,15 +20,15 @@ using namespace Dynarmic::FP;
 
 TEST_CASE("FPUnpack Tests", "[fp]") {
     const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
-        {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0},
-        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0},
-        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0},
-        {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0},
-        {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0},
-        {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0},
-        {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0},
-        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
-        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
+        {0x00000000, {FPType::Zero, false, ToNormalized(false, 0, 0)}, 0},
+        {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0},
+        {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0},
+        {0x7F800001, {FPType::SNaN, false, ToNormalized(false, 0, 0)}, 0},
+        {0xFF800001, {FPType::SNaN, true, ToNormalized(true, 0, 0)}, 0},
+        {0x7FC00001, {FPType::QNaN, false, ToNormalized(false, 0, 0)}, 0},
+        {0xFFC00001, {FPType::QNaN, true, ToNormalized(true, 0, 0)}, 0},
+        {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
     };
 
     const FPCR fpcr;
@@ -37,6 +37,13 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
         const auto output = FPUnpack<u32>(input, fpcr, fpsr);
 
         INFO("Input: " << std::hex << input);
+        INFO("Output Sign: " << std::get<2>(output).sign);
+        INFO("Output Exponent: " << std::get<2>(output).exponent);
+        INFO("Output Mantissa: " << std::hex << std::get<2>(output).mantissa);
+        INFO("Expected Sign: " << std::get<2>(expected_output).sign);
+        INFO("Expected Exponent: " << std::get<2>(expected_output).exponent);
+        INFO("Expected Mantissa: " << std::hex << std::get<2>(expected_output).mantissa);
+
         REQUIRE(output == expected_output);
         REQUIRE(fpsr.Value() == expected_fpsr);
     }
@@ -44,11 +51,11 @@ TEST_CASE("FPUnpack Tests", "[fp]") {
 
 TEST_CASE("FPRound Tests", "[fp]") {
     const static std::vector<std::tuple<u32, std::tuple<FPType, bool, FPUnpacked>, u32>> test_cases {
-        {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14},
-        {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14},
-        {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149.
-        {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon
-        {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0
+        {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0x14},
+        {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0x14},
+        {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149.
+        {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon
+        {0x3F800000, {FPType::Nonzero, false, ToNormalized(false, -28, 0xFFFFFFF)}, 0x10}, // rounds to 1.0
     };
 
     const FPCR fpcr;