From 7a673a8a4321c2e1419ed5e56397c6fd04403992 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 25 Jul 2018 17:39:14 +0100 Subject: [PATCH] fp: Change FPUnpacked to a normalized representation Having a known position for the highest set bit makes writing algorithms easier --- src/common/bit_util.h | 2 +- src/common/fp/fused.cpp | 29 +++++++--------------- src/common/fp/fused.h | 1 + src/common/fp/op/FPRSqrtEstimate.cpp | 7 +++--- src/common/fp/op/FPRSqrtStepFused.cpp | 2 +- src/common/fp/op/FPRoundInt.cpp | 11 ++++++--- src/common/fp/op/FPToFixed.cpp | 12 ++++----- src/common/fp/unpacked.cpp | 10 ++++---- src/common/fp/unpacked.h | 18 +++++++++++++- tests/fp/unpacked_tests.cpp | 35 ++++++++++++++++----------- 10 files changed, 71 insertions(+), 56 deletions(-) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 2a515533..31dec21b 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -150,7 +150,7 @@ inline size_t BitCount(Integral value) { } template -inline int HighestSetBit(T value) { +constexpr int HighestSetBit(T value) { auto x = static_cast>(value); int result = -1; while (x != 0) { diff --git a/src/common/fp/fused.cpp b/src/common/fp/fused.cpp index 9f43c6c4..44e6b765 100644 --- a/src/common/fp/fused.cpp +++ b/src/common/fp/fused.cpp @@ -11,26 +11,15 @@ namespace Dynarmic::FP { -constexpr size_t normalized_point_position = 62; constexpr size_t product_point_position = normalized_point_position * 2; -static FPUnpacked NormalizeUnpacked(FPUnpacked op) { - constexpr int desired_highest = static_cast(normalized_point_position); - - const int highest_bit = Common::HighestSetBit(op.mantissa); - DEBUG_ASSERT(highest_bit < desired_highest); - - const int offset = desired_highest - highest_bit; - op.mantissa <<= offset; - op.exponent -= offset; - return op; +static FPUnpacked ReduceMantissa(bool sign, int exponent, const u128& mantissa) { + constexpr int point_position_correction = normalized_point_position - (product_point_position - 64); + // We round-to-odd here when reducing the bitwidth of the mantissa so that subsequent roundings are accurate. + return {sign, exponent + point_position_correction, mantissa.upper | static_cast(mantissa.lower != 0)}; } FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { - addend = NormalizeUnpacked(addend); - op1 = NormalizeUnpacked(op1); - op2 = NormalizeUnpacked(op2); - const bool product_sign = op1.sign != op2.sign; const auto [product_exponent, product_value] = [op1, op2]{ int exponent = op1.exponent + op2.exponent; @@ -47,10 +36,10 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { } if (addend.mantissa == 0) { - return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)}; + return ReduceMantissa(product_sign, product_exponent, product_value); } - const int exp_diff = product_exponent - (addend.exponent - normalized_point_position); + const int exp_diff = product_exponent - addend.exponent; if (product_sign == addend.sign) { // Addition @@ -63,7 +52,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { // addend < product const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position); - return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)}; + return ReduceMantissa(product_sign, product_exponent, result); } // Subtraction @@ -80,7 +69,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { result = product_value - addend_long; } else if (exp_diff <= 0) { result_sign = !product_sign; - result_exponent = addend.exponent - normalized_point_position; + result_exponent = addend.exponent; result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff); } else { result_sign = product_sign; @@ -95,7 +84,7 @@ FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper); result = result << required_shift; result_exponent -= required_shift; - return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)}; + return ReduceMantissa(result_sign, result_exponent, result); } } // namespace Dynarmic::FP diff --git a/src/common/fp/fused.h b/src/common/fp/fused.h index cdbc16ee..94bd5540 100644 --- a/src/common/fp/fused.h +++ b/src/common/fp/fused.h @@ -10,6 +10,7 @@ namespace Dynarmic::FP { struct FPUnpacked; +/// This function assumes all arguments have been normalized. FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2); } // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRSqrtEstimate.cpp b/src/common/fp/op/FPRSqrtEstimate.cpp index ff0590b0..28e6d650 100644 --- a/src/common/fp/op/FPRSqrtEstimate.cpp +++ b/src/common/fp/op/FPRSqrtEstimate.cpp @@ -79,11 +79,10 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { return FPInfo::Zero(false); } - const int highest_bit = Common::HighestSetBit(value.mantissa); - const int result_exponent = (-(value.exponent + highest_bit + 1)) >> 1; - const bool was_exponent_odd = (value.exponent + highest_bit) % 2 == 0; + const int result_exponent = (-(value.exponent + 1)) >> 1; + const bool was_exponent_odd = (value.exponent) % 2 == 0; - const u64 scaled = Safe::LogicalShiftRight(value.mantissa, highest_bit - (was_exponent_odd ? 7 : 8)); + const u64 scaled = Safe::LogicalShiftRight(value.mantissa, normalized_point_position - (was_exponent_odd ? 7 : 8)); const u64 estimate = RecipSqrtEstimate(scaled); const FPT bits_exponent = static_cast(result_exponent + FPInfo::exponent_bias); diff --git a/src/common/fp/op/FPRSqrtStepFused.cpp b/src/common/fp/op/FPRSqrtStepFused.cpp index b5ce5570..e83db7b2 100644 --- a/src/common/fp/op/FPRSqrtStepFused.cpp +++ b/src/common/fp/op/FPRSqrtStepFused.cpp @@ -41,7 +41,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { } // result_value = (3.0 + (value1 * value2)) / 2.0 - FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2); + FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 3), value1, value2); result_value.exponent--; if (result_value.mantissa == 0) { diff --git a/src/common/fp/op/FPRoundInt.cpp b/src/common/fp/op/FPRoundInt.cpp index a14c21ca..f22c83d3 100644 --- a/src/common/fp/op/FPRoundInt.cpp +++ b/src/common/fp/op/FPRoundInt.cpp @@ -38,14 +38,17 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) return FPInfo::Zero(sign); } - if (value.exponent >= 0) { + // Reshift decimal point back to bit zero. + const int exponent = value.exponent - normalized_point_position; + + if (exponent >= 0) { // Guaranteed to be an integer return op; } u64 int_result = sign ? Safe::Negate(value.mantissa) : static_cast(value.mantissa); - const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); - int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); + const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent); + int_result = Safe::ArithmeticShiftLeft(int_result, exponent); bool round_up = false; switch (rounding) { @@ -77,7 +80,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr) const FPT result = int_result == 0 ? FPInfo::Zero(sign) - : FPRound(FPUnpacked{new_sign, 0, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); + : FPRound(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr); if (error != ResidualError::Zero && exact) { FPProcessException(FPExc::Inexact, fpcr, fpsr); diff --git a/src/common/fp/op/FPToFixed.cpp b/src/common/fp/op/FPToFixed.cpp index f93d021b..97f549e6 100644 --- a/src/common/fp/op/FPToFixed.cpp +++ b/src/common/fp/op/FPToFixed.cpp @@ -40,12 +40,12 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou return 0; } - // value *= 2.0^fbits - value.exponent += static_cast(fbits); + // value *= 2.0^fbits and reshift the decimal point back to bit zero. + int exponent = value.exponent + static_cast(fbits) - normalized_point_position; u64 int_result = sign ? Safe::Negate(value.mantissa) : static_cast(value.mantissa); - const ResidualError error = ResidualErrorOnRightShift(int_result, -value.exponent); - int_result = Safe::ArithmeticShiftLeft(int_result, value.exponent); + const ResidualError error = ResidualErrorOnRightShift(int_result, -exponent); + int_result = Safe::ArithmeticShiftLeft(int_result, exponent); bool round_up = false; switch (rounding) { @@ -74,7 +74,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou // Detect Overflow const int min_exponent_for_overflow = static_cast(ibits) - static_cast(Common::HighestSetBit(value.mantissa + (round_up ? 1 : 0))) - (unsigned_ ? 0 : 1); - if (value.exponent >= min_exponent_for_overflow) { + if (exponent >= min_exponent_for_overflow) { // Positive overflow if (unsigned_ || !sign) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); @@ -83,7 +83,7 @@ u64 FPToFixed(size_t ibits, FPT op, size_t fbits, bool unsigned_, FPCR fpcr, Rou // Negative overflow const u64 min_value = Safe::Negate(static_cast(1) << (ibits - 1)); - if (!(value.exponent == min_exponent_for_overflow && int_result == min_value)) { + if (!(exponent == min_exponent_for_overflow && int_result == min_value)) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); return static_cast(1) << (ibits - 1); } diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index f9efe994..b30f47fe 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -35,20 +35,20 @@ std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr) { return {FPType::Zero, sign, {sign, 0, 0}}; } - return {FPType::Nonzero, sign, {sign, denormal_exponent, frac_raw}}; + return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)}; } if (exp_raw == Common::Ones(FPInfo::exponent_width)) { if (frac_raw == 0) { - return {FPType::Infinity, sign, {sign, 1000000, 1}}; + return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)}; } const bool is_quiet = Common::Bit(frac_raw); return {is_quiet ? FPType::QNaN : FPType::SNaN, sign, {sign, 0, 0}}; } - const int exp = static_cast(exp_raw) - FPInfo::exponent_bias - FPInfo::explicit_mantissa_width; - const u64 frac = frac_raw | FPInfo::implicit_leading_bit; + const int exp = static_cast(exp_raw) - FPInfo::exponent_bias; + const u64 frac = static_cast(frac_raw | FPInfo::implicit_leading_bit) << (normalized_point_position - FPInfo::explicit_mantissa_width); return {FPType::Nonzero, sign, {sign, exp, frac}}; } @@ -61,7 +61,7 @@ std::tuple Normalize(FPUnpacked op, int extra_rig const int shift_amount = highest_set_bit - static_cast(F) + extra_right_shift; const u64 mantissa = Safe::LogicalShiftRight(op.mantissa, shift_amount); const ResidualError error = ResidualErrorOnRightShift(op.mantissa, shift_amount); - const int exponent = op.exponent + highest_set_bit; + const int exponent = op.exponent + highest_set_bit - normalized_point_position; return std::make_tuple(op.sign, exponent, mantissa, error); } diff --git a/src/common/fp/unpacked.h b/src/common/fp/unpacked.h index a0961bd7..132fedd0 100644 --- a/src/common/fp/unpacked.h +++ b/src/common/fp/unpacked.h @@ -24,7 +24,10 @@ enum class FPType { SNaN, }; -/// value = (sign ? -1 : +1) * mantissa * 2^exponent +constexpr size_t normalized_point_position = 62; + +/// value = (sign ? -1 : +1) * mantissa/(2^62) * 2^exponent +/// 63rd bit of mantissa is always set (unless value is zero) struct FPUnpacked { bool sign; int exponent; @@ -35,6 +38,19 @@ inline bool operator==(const FPUnpacked& a, const FPUnpacked& b) { return std::tie(a.sign, a.exponent, a.mantissa) == std::tie(b.sign, b.exponent, b.mantissa); } +/// return value = (sign ? -1 : +1) * value * 2^exponent +constexpr FPUnpacked ToNormalized(bool sign, int exponent, u64 value) { + if (value == 0) { + return {sign, 0, 0}; + } + + const int highest_bit = Common::HighestSetBit(value); + const int offset = static_cast(normalized_point_position) - highest_bit; + value <<= offset; + exponent -= offset - normalized_point_position; + return {sign, exponent, value}; +} + template std::tuple FPUnpack(FPT op, FPCR fpcr, FPSR& fpsr); diff --git a/tests/fp/unpacked_tests.cpp b/tests/fp/unpacked_tests.cpp index 1f7cd12c..6a0d5a30 100644 --- a/tests/fp/unpacked_tests.cpp +++ b/tests/fp/unpacked_tests.cpp @@ -20,15 +20,15 @@ using namespace Dynarmic::FP; TEST_CASE("FPUnpack Tests", "[fp]") { const static std::vector, u32>> test_cases { - {0x00000000, {FPType::Zero, false, {false, 0, 0}}, 0}, - {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0}, - {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0}, - {0x7F800001, {FPType::SNaN, false, {false, 0, 0}}, 0}, - {0xFF800001, {FPType::SNaN, true, {true, 0, 0}}, 0}, - {0x7FC00001, {FPType::QNaN, false, {false, 0, 0}}, 0}, - {0xFFC00001, {FPType::QNaN, true, {true, 0, 0}}, 0}, - {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. - {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon + {0x00000000, {FPType::Zero, false, ToNormalized(false, 0, 0)}, 0}, + {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0}, + {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0}, + {0x7F800001, {FPType::SNaN, false, ToNormalized(false, 0, 0)}, 0}, + {0xFF800001, {FPType::SNaN, true, ToNormalized(true, 0, 0)}, 0}, + {0x7FC00001, {FPType::QNaN, false, ToNormalized(false, 0, 0)}, 0}, + {0xFFC00001, {FPType::QNaN, true, ToNormalized(true, 0, 0)}, 0}, + {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149. + {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon }; const FPCR fpcr; @@ -37,6 +37,13 @@ TEST_CASE("FPUnpack Tests", "[fp]") { const auto output = FPUnpack(input, fpcr, fpsr); INFO("Input: " << std::hex << input); + INFO("Output Sign: " << std::get<2>(output).sign); + INFO("Output Exponent: " << std::get<2>(output).exponent); + INFO("Output Mantissa: " << std::hex << std::get<2>(output).mantissa); + INFO("Expected Sign: " << std::get<2>(expected_output).sign); + INFO("Expected Exponent: " << std::get<2>(expected_output).exponent); + INFO("Expected Mantissa: " << std::hex << std::get<2>(expected_output).mantissa); + REQUIRE(output == expected_output); REQUIRE(fpsr.Value() == expected_fpsr); } @@ -44,11 +51,11 @@ TEST_CASE("FPUnpack Tests", "[fp]") { TEST_CASE("FPRound Tests", "[fp]") { const static std::vector, u32>> test_cases { - {0x7F800000, {FPType::Infinity, false, {false, 1000000, 1}}, 0x14}, - {0xFF800000, {FPType::Infinity, true, {true, 1000000, 1}}, 0x14}, - {0x00000001, {FPType::Nonzero, false, {false, -149, 1}}, 0}, // Smallest single precision denormal is 2^-149. - {0x3F7FFFFF, {FPType::Nonzero, false, {false, -24, 0xFFFFFF}}, 0}, // 1.0 - epsilon - {0x3F800000, {FPType::Nonzero, false, {false, -28, 0xFFFFFFF}}, 0x10}, // rounds to 1.0 + {0x7F800000, {FPType::Infinity, false, ToNormalized(false, 1000000, 1)}, 0x14}, + {0xFF800000, {FPType::Infinity, true, ToNormalized(true, 1000000, 1)}, 0x14}, + {0x00000001, {FPType::Nonzero, false, ToNormalized(false, -149, 1)}, 0}, // Smallest single precision denormal is 2^-149. + {0x3F7FFFFF, {FPType::Nonzero, false, ToNormalized(false, -24, 0xFFFFFF)}, 0}, // 1.0 - epsilon + {0x3F800000, {FPType::Nonzero, false, ToNormalized(false, -28, 0xFFFFFFF)}, 0x10}, // rounds to 1.0 }; const FPCR fpcr;