fp: Implement FPMulAdd
This commit is contained in:
parent
53a8c15d12
commit
e199887fbc
6 changed files with 217 additions and 0 deletions
|
@ -18,9 +18,13 @@ add_library(dynarmic
|
||||||
common/crc32.h
|
common/crc32.h
|
||||||
common/fp/fpcr.h
|
common/fp/fpcr.h
|
||||||
common/fp/fpsr.h
|
common/fp/fpsr.h
|
||||||
|
common/fp/fused.cpp
|
||||||
|
common/fp/fused.h
|
||||||
common/fp/info.h
|
common/fp/info.h
|
||||||
common/fp/mantissa_util.h
|
common/fp/mantissa_util.h
|
||||||
common/fp/op.h
|
common/fp/op.h
|
||||||
|
common/fp/op/FPMulAdd.cpp
|
||||||
|
common/fp/op/FPMulAdd.h
|
||||||
common/fp/op/FPRoundInt.cpp
|
common/fp/op/FPRoundInt.cpp
|
||||||
common/fp/op/FPRoundInt.h
|
common/fp/op/FPRoundInt.h
|
||||||
common/fp/op/FPRSqrtEstimate.cpp
|
common/fp/op/FPRSqrtEstimate.cpp
|
||||||
|
|
101
src/common/fp/fused.cpp
Normal file
101
src/common/fp/fused.cpp
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/fp/mantissa_util.h"
|
||||||
|
#include "common/fp/unpacked.h"
|
||||||
|
#include "common/u128.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
constexpr size_t normalized_point_position = 62;
|
||||||
|
constexpr size_t product_point_position = normalized_point_position * 2;
|
||||||
|
|
||||||
|
static FPUnpacked NormalizeUnpacked(FPUnpacked op) {
|
||||||
|
constexpr int desired_highest = static_cast<int>(normalized_point_position);
|
||||||
|
|
||||||
|
const int highest_bit = Common::HighestSetBit(op.mantissa);
|
||||||
|
DEBUG_ASSERT(highest_bit < desired_highest);
|
||||||
|
|
||||||
|
const int offset = desired_highest - highest_bit;
|
||||||
|
op.mantissa <<= offset;
|
||||||
|
op.exponent -= offset;
|
||||||
|
return op;
|
||||||
|
}
|
||||||
|
|
||||||
|
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2) {
|
||||||
|
addend = NormalizeUnpacked(addend);
|
||||||
|
op1 = NormalizeUnpacked(op1);
|
||||||
|
op2 = NormalizeUnpacked(op2);
|
||||||
|
|
||||||
|
const bool product_sign = op1.sign != op2.sign;
|
||||||
|
const auto [product_exponent, product_value] = [op1, op2]{
|
||||||
|
int exponent = op1.exponent + op2.exponent;
|
||||||
|
u128 value = Multiply64To128(op1.mantissa, op2.mantissa);
|
||||||
|
if (value.Bit<product_point_position + 1>()) {
|
||||||
|
value = value >> 1;
|
||||||
|
exponent++;
|
||||||
|
}
|
||||||
|
return std::make_tuple(exponent, value);
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (product_value == 0) {
|
||||||
|
return addend;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (addend.mantissa == 0) {
|
||||||
|
return FPUnpacked{product_sign, product_exponent + 64, product_value.upper | u64(product_value.lower != 0)};
|
||||||
|
}
|
||||||
|
|
||||||
|
const int exp_diff = product_exponent - (addend.exponent - normalized_point_position);
|
||||||
|
|
||||||
|
if (product_sign == addend.sign) {
|
||||||
|
// Addition
|
||||||
|
|
||||||
|
if (exp_diff <= 0) {
|
||||||
|
// addend > product
|
||||||
|
const u64 result = addend.mantissa + StickyLogicalShiftRight(product_value, normalized_point_position - exp_diff).lower;
|
||||||
|
return FPUnpacked{addend.sign, addend.exponent, result};
|
||||||
|
}
|
||||||
|
|
||||||
|
// addend < product
|
||||||
|
const u128 result = product_value + StickyLogicalShiftRight(addend.mantissa, exp_diff - normalized_point_position);
|
||||||
|
return FPUnpacked{product_sign, product_exponent + 64, result.upper | u64(result.lower != 0)};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subtraction
|
||||||
|
|
||||||
|
const u128 addend_long = u128(addend.mantissa) << normalized_point_position;
|
||||||
|
|
||||||
|
bool result_sign;
|
||||||
|
u128 result;
|
||||||
|
int result_exponent;
|
||||||
|
|
||||||
|
if (exp_diff == 0 && product_value > addend_long) {
|
||||||
|
result_sign = product_sign;
|
||||||
|
result_exponent = product_exponent;
|
||||||
|
result = product_value - addend_long;
|
||||||
|
} else if (exp_diff <= 0) {
|
||||||
|
result_sign = !product_sign;
|
||||||
|
result_exponent = addend.exponent - normalized_point_position;
|
||||||
|
result = addend_long - StickyLogicalShiftRight(product_value, -exp_diff);
|
||||||
|
} else {
|
||||||
|
result_sign = product_sign;
|
||||||
|
result_exponent = product_exponent;
|
||||||
|
result = product_value - StickyLogicalShiftRight(addend_long, exp_diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.upper == 0) {
|
||||||
|
return FPUnpacked{result_sign, result_exponent, result.lower};
|
||||||
|
}
|
||||||
|
|
||||||
|
const int required_shift = normalized_point_position - Common::HighestSetBit(result.upper);
|
||||||
|
result = result << required_shift;
|
||||||
|
result_exponent -= required_shift;
|
||||||
|
return FPUnpacked{result_sign, result_exponent + 64, result.upper | u64(result.lower != 0)};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
15
src/common/fp/fused.h
Normal file
15
src/common/fp/fused.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/fp/unpacked.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
FPUnpacked FusedMulAdd(FPUnpacked addend, FPUnpacked op1, FPUnpacked op2);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/fp/op/FPMulAdd.h"
|
||||||
#include "common/fp/op/FPRoundInt.h"
|
#include "common/fp/op/FPRoundInt.h"
|
||||||
#include "common/fp/op/FPRSqrtEstimate.h"
|
#include "common/fp/op/FPRSqrtEstimate.h"
|
||||||
#include "common/fp/op/FPToFixed.h"
|
#include "common/fp/op/FPToFixed.h"
|
||||||
|
|
79
src/common/fp/op/FPMulAdd.cpp
Normal file
79
src/common/fp/op/FPMulAdd.cpp
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "common/fp/fpcr.h"
|
||||||
|
#include "common/fp/fpsr.h"
|
||||||
|
#include "common/fp/info.h"
|
||||||
|
#include "common/fp/fused.h"
|
||||||
|
#include "common/fp/op/FPMulAdd.h"
|
||||||
|
#include "common/fp/process_exception.h"
|
||||||
|
#include "common/fp/process_nan.h"
|
||||||
|
#include "common/fp/unpacked.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
template<typename FPT>
|
||||||
|
FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
||||||
|
const RoundingMode rounding = fpcr.RMode();
|
||||||
|
|
||||||
|
const auto [typeA, signA, valueA] = FPUnpack(addend, fpcr, fpsr);
|
||||||
|
const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr);
|
||||||
|
const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr);
|
||||||
|
|
||||||
|
const bool infA = typeA == FPType::Infinity;
|
||||||
|
const bool inf1 = type1 == FPType::Infinity;
|
||||||
|
const bool inf2 = type2 == FPType::Infinity;
|
||||||
|
const bool zeroA = typeA == FPType::Zero;
|
||||||
|
const bool zero1 = type1 == FPType::Zero;
|
||||||
|
const bool zero2 = type2 == FPType::Zero;
|
||||||
|
|
||||||
|
const auto maybe_nan = FPProcessNaNs3<FPT>(typeA, type1, type2, addend, op1, op2, fpcr, fpsr);
|
||||||
|
|
||||||
|
if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) {
|
||||||
|
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||||
|
return FPInfo<FPT>::DefaultNaN();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maybe_nan) {
|
||||||
|
return *maybe_nan;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate properties of product (op1 * op2).
|
||||||
|
const bool signP = sign1 != sign2;
|
||||||
|
const bool infP = inf1 || inf2;
|
||||||
|
const bool zeroP = zero1 || zero2;
|
||||||
|
|
||||||
|
// Raise NaN on (inf * inf) of opposite signs or (inf * zero).
|
||||||
|
if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) {
|
||||||
|
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||||
|
return FPInfo<FPT>::DefaultNaN();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle infinities
|
||||||
|
if ((infA && !signA) || (infP && !signP)) {
|
||||||
|
return FPInfo<FPT>::Infinity(false);
|
||||||
|
}
|
||||||
|
if ((infA && signA) || (infP && signP)) {
|
||||||
|
return FPInfo<FPT>::Infinity(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result is exactly zero
|
||||||
|
if (zeroA && zeroP && signA == signP) {
|
||||||
|
return FPInfo<FPT>::Zero(signA);
|
||||||
|
}
|
||||||
|
|
||||||
|
const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2);
|
||||||
|
if (result_value.mantissa == 0) {
|
||||||
|
return FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity);
|
||||||
|
}
|
||||||
|
return FPRound<FPT>(result_value, fpcr, fpsr);
|
||||||
|
}
|
||||||
|
|
||||||
|
template u32 FPMulAdd<u32>(u32 addend, u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
template u64 FPMulAdd<u64>(u64 addend, u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
17
src/common/fp/op/FPMulAdd.h
Normal file
17
src/common/fp/op/FPMulAdd.h
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
class FPCR;
|
||||||
|
class FPSR;
|
||||||
|
|
||||||
|
template<typename FPT>
|
||||||
|
FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
Loading…
Reference in a new issue