diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8ea66698..3f4d5820 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,9 +18,13 @@ add_library(dynarmic common/crc32.h common/fp/fpcr.h common/fp/fpsr.h + common/fp/fused.cpp + common/fp/fused.h common/fp/info.h common/fp/mantissa_util.h common/fp/op.h + common/fp/op/FPMulAdd.cpp + common/fp/op/FPMulAdd.h common/fp/op/FPRoundInt.cpp common/fp/op/FPRoundInt.h common/fp/op/FPRSqrtEstimate.cpp diff --git a/src/common/fp/fused.cpp b/src/common/fp/fused.cpp new file mode 100644 index 00000000..9f43c6c4 --- /dev/null +++ b/src/common/fp/fused.cpp @@ -0,0 +1,101 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/assert.h" +#include "common/fp/mantissa_util.h" +#include "common/fp/unpacked.h" +#include "common/u128.h" + +namespace Dynarmic::FP { + +constexpr size_t normalized_point_position = 62; +constexpr size_t product_point_position = normalized_point_position * 2; + +static FPUnpacked NormalizeUnpacked(FPUnpacked op) { + constexpr int desired_highest = static_cast(normalized_point_position); + + const int highest_bit = Common::HighestSetBit(op.mantissa); + DEBUG_ASSERT(highest_bit < desired_highest); + + const int offset = desired_highest - highest_bit; + op.mantissa <<= offset; + op.exponent -= offset; + return op; +} + +FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) { + addend = NormalizeUnpacked(addend); + op1 = NormalizeUnpacked(op1); + op2 = NormalizeUnpacked(op2); + + const bool product_sign = op1.sign != op2.sign; + const auto [product_exponent, product_value] = [op1, op2]{ + int exponent = op1.exponent + op2.exponent; + u128 value = Multiply64To128(op1.mantissa, op2.mantissa); + if (value.Bit()) { + value = value >> 1; + exponent++; + } + return std::make_tuple(exponent, value); + }(); + + if (product_value == 0) { + return addend; + } + + if (addend.mantissa == 0) { + return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)}; + } + + const int exp_diff = product_exponent - (addend.exponent - normalized_point_position); + + if (product_sign == addend.sign) { + // Addition + + if (exp_diff <= 0) { + // addend > product + const u64 result = addend.mantissa + StickyLogicalShiftRight(product_value, normalized_point_position - exp_diff).lower; + return FPUnpacked{addend.sign, addend.exponent, result}; + } + + // addend < product + const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position); + return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)}; + } + + // Subtraction + + const u128 addend_long = u128(addend.mantissa) << normalized_point_position; + + bool result_sign; + u128 result; + int result_exponent; + + if (exp_diff == 0 && product_value > addend_long) { + result_sign = product_sign; + result_exponent = product_exponent; + result = product_value - addend_long; + } else if (exp_diff <= 0) { + result_sign = !product_sign; + result_exponent = addend.exponent - normalized_point_position; + result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff); + } else { + result_sign = product_sign; + result_exponent = product_exponent; + result = product_value - StickyLogicalShiftRight(addend_long, exp_diff); + } + + if (result.upper == 0) { + return FPUnpacked{result_sign, result_exponent, result.lower}; + } + + const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper); + result = result << required_shift; + result_exponent -= required_shift; + return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)}; +} + +} // namespace Dynarmic::FP diff --git a/src/common/fp/fused.h b/src/common/fp/fused.h new file mode 100644 index 00000000..da65008f --- /dev/null +++ b/src/common/fp/fused.h @@ -0,0 +1,15 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op.h b/src/common/fp/op.h index a2ef9dc4..1fee5180 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -6,6 +6,7 @@ #pragma once +#include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPToFixed.h" diff --git a/src/common/fp/op/FPMulAdd.cpp b/src/common/fp/op/FPMulAdd.cpp new file mode 100644 index 00000000..5cabe374 --- /dev/null +++ b/src/common/fp/op/FPMulAdd.cpp @@ -0,0 +1,79 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/common_types.h" +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/info.h" +#include "common/fp/fused.h" +#include "common/fp/op/FPMulAdd.h" +#include "common/fp/process_exception.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +template +FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { + const RoundingMode rounding = fpcr.RMode(); + + const auto [typeA, signA, valueA] = FPUnpack(addend, fpcr, fpsr); + const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr); + const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr); + + const bool infA = typeA == FPType::Infinity; + const bool inf1 = type1 == FPType::Infinity; + const bool inf2 = type2 == FPType::Infinity; + const bool zeroA = typeA == FPType::Zero; + const bool zero1 = type1 == FPType::Zero; + const bool zero2 = type2 == FPType::Zero; + + const auto maybe_nan = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, fpcr, fpsr); + + if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return FPInfo::DefaultNaN(); + } + + if (maybe_nan) { + return *maybe_nan; + } + + // Calculate properties of product (op1 * op2). + const bool signP = sign1 != sign2; + const bool infP = inf1 || inf2; + const bool zeroP = zero1 || zero2; + + // Raise NaN on (inf * inf) of opposite signs or (inf * zero). + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return FPInfo::DefaultNaN(); + } + + // Handle infinities + if ((infA && !signA) || (infP && !signP)) { + return FPInfo::Infinity(false); + } + if ((infA && signA) || (infP && signP)) { + return FPInfo::Infinity(true); + } + + // Result is exactly zero + if (zeroA && zeroP && signA == signP) { + return FPInfo::Zero(signA); + } + + const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2); + if (result_value.mantissa == 0) { + return FPInfo::Zero(rounding == RoundingMode::TowardsMinusInfinity); + } + return FPRound(result_value, fpcr, fpsr); +} + +template u32 FPMulAdd(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); +template u64 FPMulAdd(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPMulAdd.h b/src/common/fp/op/FPMulAdd.h new file mode 100644 index 00000000..7b1556e4 --- /dev/null +++ b/src/common/fp/op/FPMulAdd.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP