common/fp/info: Make half-precision info struct functions return correctly sized types
While initially done to potentially prevent creating bugs due to C++ having a silly type-promotion mechanism involving types < sizeof(int) and unsignedness, given that the bulk of these functions' usages are on exit paths, these can return the correct type to avoid the need to cast at every usage point.
This commit is contained in:
parent
699ad98b2a
commit
c9777ef997
10 changed files with 44 additions and 33 deletions
|
@ -20,21 +20,32 @@ struct FPInfo<u16> {
|
|||
static constexpr size_t exponent_width = 5;
|
||||
static constexpr size_t explicit_mantissa_width = 10;
|
||||
static constexpr size_t mantissa_width = explicit_mantissa_width + 1;
|
||||
|
||||
|
||||
static constexpr u32 implicit_leading_bit = u32(1) << explicit_mantissa_width;
|
||||
static constexpr u32 sign_mask = 0x8000;
|
||||
static constexpr u32 exponent_mask = 0x7C00;
|
||||
static constexpr u32 mantissa_mask = 0x3FF;
|
||||
static constexpr u32 mantissa_msb = 0x200;
|
||||
|
||||
|
||||
static constexpr int exponent_min = -14;
|
||||
static constexpr int exponent_max = 15;
|
||||
static constexpr int exponent_bias = 15;
|
||||
|
||||
static constexpr u32 Zero(bool sign) { return sign ? sign_mask : 0; }
|
||||
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
|
||||
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
|
||||
static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
|
||||
|
||||
static constexpr u16 Zero(bool sign) {
|
||||
return sign ? static_cast<u16>(sign_mask) : u16{0};
|
||||
}
|
||||
|
||||
static constexpr u16 Infinity(bool sign) {
|
||||
return static_cast<u16>(exponent_mask | Zero(sign));
|
||||
}
|
||||
|
||||
static constexpr u16 MaxNormal(bool sign) {
|
||||
return static_cast<u16>((exponent_mask - 1) | Zero(sign));
|
||||
}
|
||||
|
||||
static constexpr u16 DefaultNaN() {
|
||||
return static_cast<u16>(exponent_mask | (u32(1) << (explicit_mantissa_width - 1)));
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
|
@ -88,7 +99,7 @@ struct FPInfo<u64> {
|
|||
template<typename FPT, bool sign, int exponent, FPT value>
|
||||
constexpr FPT FPValue() {
|
||||
if constexpr (value == 0) {
|
||||
return FPT(FPInfo<FPT>::Zero(sign));
|
||||
return FPInfo<FPT>::Zero(sign);
|
||||
}
|
||||
|
||||
constexpr int point_position = static_cast<int>(FPInfo<FPT>::explicit_mantissa_width);
|
||||
|
|
|
@ -35,7 +35,7 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
|||
|
||||
if (typeA == FPType::QNaN && ((inf1 && zero2) || (zero1 && inf2))) {
|
||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||
return FPT(FPInfo<FPT>::DefaultNaN());
|
||||
return FPInfo<FPT>::DefaultNaN();
|
||||
}
|
||||
|
||||
if (maybe_nan) {
|
||||
|
@ -50,25 +50,25 @@ FPT FPMulAdd(FPT addend, FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
|||
// Raise NaN on (inf * inf) of opposite signs or (inf * zero).
|
||||
if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) {
|
||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||
return FPT(FPInfo<FPT>::DefaultNaN());
|
||||
return FPInfo<FPT>::DefaultNaN();
|
||||
}
|
||||
|
||||
// Handle infinities
|
||||
if ((infA && !signA) || (infP && !signP)) {
|
||||
return FPT(FPInfo<FPT>::Infinity(false));
|
||||
return FPInfo<FPT>::Infinity(false);
|
||||
}
|
||||
if ((infA && signA) || (infP && signP)) {
|
||||
return FPT(FPInfo<FPT>::Infinity(true));
|
||||
return FPInfo<FPT>::Infinity(true);
|
||||
}
|
||||
|
||||
// Result is exactly zero
|
||||
if (zeroA && zeroP && signA == signP) {
|
||||
return FPT(FPInfo<FPT>::Zero(signA));
|
||||
return FPInfo<FPT>::Zero(signA);
|
||||
}
|
||||
|
||||
const FPUnpacked result_value = FusedMulAdd(valueA, value1, value2);
|
||||
if (result_value.mantissa == 0) {
|
||||
return FPT(FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity));
|
||||
return FPInfo<FPT>::Zero(rounding == RoundingMode::TowardsMinusInfinity);
|
||||
}
|
||||
return FPRound<FPT>(result_value, fpcr, fpsr);
|
||||
}
|
||||
|
|
|
@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
|
||||
if (type == FPType::Zero) {
|
||||
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
|
||||
return FPT(FPInfo<FPT>::Infinity(sign));
|
||||
return FPInfo<FPT>::Infinity(sign);
|
||||
}
|
||||
|
||||
if (sign) {
|
||||
FPProcessException(FPExc::InvalidOp, fpcr, fpsr);
|
||||
return FPT(FPInfo<FPT>::DefaultNaN());
|
||||
return FPInfo<FPT>::DefaultNaN();
|
||||
}
|
||||
|
||||
if (type == FPType::Infinity) {
|
||||
return FPT(FPInfo<FPT>::Zero(false));
|
||||
return FPInfo<FPT>::Zero(false);
|
||||
}
|
||||
|
||||
const int result_exponent = (-(value.exponent + 1)) >> 1;
|
||||
|
|
|
@ -37,7 +37,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
|||
}
|
||||
|
||||
if (inf1 || inf2) {
|
||||
return FPT(FPInfo<FPT>::Infinity(sign1 != sign2));
|
||||
return FPInfo<FPT>::Infinity(sign1 != sign2);
|
||||
}
|
||||
|
||||
// result_value = (3.0 + (value1 * value2)) / 2.0
|
||||
|
@ -45,7 +45,7 @@ FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
|||
result_value.exponent--;
|
||||
|
||||
if (result_value.mantissa == 0) {
|
||||
return FPT(FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity));
|
||||
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
|
||||
}
|
||||
return FPRound<FPT>(result_value, fpcr, fpsr);
|
||||
}
|
||||
|
|
|
@ -31,12 +31,12 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
}
|
||||
|
||||
if (type == FPType::Infinity) {
|
||||
return FPT(FPInfo<FPT>::Zero(sign));
|
||||
return FPInfo<FPT>::Zero(sign);
|
||||
}
|
||||
|
||||
if (type == FPType::Zero) {
|
||||
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
|
||||
return FPT(FPInfo<FPT>::Infinity(sign));
|
||||
return FPInfo<FPT>::Infinity(sign);
|
||||
}
|
||||
|
||||
if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
|
||||
|
@ -58,13 +58,13 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
|
||||
FPProcessException(FPExc::Overflow, fpcr, fpsr);
|
||||
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
||||
return overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
|
||||
return overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
|
||||
}
|
||||
|
||||
if ((fpcr.FZ() && !std::is_same_v<FPT, u16>) || (fpcr.FZ16() && std::is_same_v<FPT, u16>)) {
|
||||
if (value.exponent >= -FPInfo<FPT>::exponent_min) {
|
||||
fpsr.UFC(true);
|
||||
return FPT(FPInfo<FPT>::Zero(sign));
|
||||
return FPInfo<FPT>::Zero(sign);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
}
|
||||
}
|
||||
|
||||
const FPT bits_sign = FPT(FPInfo<FPT>::Zero(sign));
|
||||
const FPT bits_sign = FPInfo<FPT>::Zero(sign);
|
||||
const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
|
||||
const FPT bits_mantissa = static_cast<FPT>(estimate);
|
||||
return FPT((bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask) | bits_sign);
|
||||
|
|
|
@ -38,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
return FPProcessNaN(type, op, fpcr, fpsr);
|
||||
}
|
||||
|
||||
const FPT sign_bits = FPT(FPInfo<FPT>::Zero(sign));
|
||||
const FPT sign_bits = FPInfo<FPT>::Zero(sign);
|
||||
const FPT exponent = DetermineExponentValue<FPT>(op);
|
||||
|
||||
// Zero and denormals
|
||||
|
|
|
@ -37,14 +37,14 @@ FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
|||
}
|
||||
|
||||
if (inf1 || inf2) {
|
||||
return FPT(FPInfo<FPT>::Infinity(sign1 != sign2));
|
||||
return FPInfo<FPT>::Infinity(sign1 != sign2);
|
||||
}
|
||||
|
||||
// result_value = 2.0 + (value1 * value2)
|
||||
const FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2);
|
||||
|
||||
if (result_value.mantissa == 0) {
|
||||
return FPT(FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity));
|
||||
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
|
||||
}
|
||||
return FPRound<FPT>(result_value, fpcr, fpsr);
|
||||
}
|
||||
|
|
|
@ -31,11 +31,11 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
|||
}
|
||||
|
||||
if (type == FPType::Infinity) {
|
||||
return FPT(FPInfo<FPT>::Infinity(sign));
|
||||
return FPInfo<FPT>::Infinity(sign);
|
||||
}
|
||||
|
||||
if (type == FPType::Zero) {
|
||||
return FPT(FPInfo<FPT>::Zero(sign));
|
||||
return FPInfo<FPT>::Zero(sign);
|
||||
}
|
||||
|
||||
// Reshift decimal point back to bit zero.
|
||||
|
@ -79,7 +79,7 @@ u64 FPRoundInt(FPT op, FPCR fpcr, RoundingMode rounding, bool exact, FPSR& fpsr)
|
|||
const u64 abs_int_result = new_sign ? Safe::Negate<u64>(int_result) : static_cast<u64>(int_result);
|
||||
|
||||
const FPT result = int_result == 0
|
||||
? FPT(FPInfo<FPT>::Zero(sign))
|
||||
? FPInfo<FPT>::Zero(sign)
|
||||
: FPRound<FPT>(FPUnpacked{new_sign, normalized_point_position, abs_int_result}, fpcr, RoundingMode::TowardsZero, fpsr);
|
||||
|
||||
if (error != ResidualError::Zero && exact) {
|
||||
|
|
|
@ -31,7 +31,7 @@ FPT FPProcessNaN(FPType type, FPT op, FPCR fpcr, FPSR& fpsr) {
|
|||
}
|
||||
|
||||
if (fpcr.DN()) {
|
||||
result = FPT(FPInfo<FPT>::DefaultNaN());
|
||||
result = FPInfo<FPT>::DefaultNaN();
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
@ -90,7 +90,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
|
|||
|
||||
if (((!isFP16 && fpcr.FZ()) || (isFP16 && fpcr.FZ16())) && exponent < minimum_exp) {
|
||||
fpsr.UFC(true);
|
||||
return FPT(FPInfo<FPT>::Zero(sign));
|
||||
return FPInfo<FPT>::Zero(sign);
|
||||
}
|
||||
|
||||
int biased_exp = std::max<int>(exponent - minimum_exp + 1, 0);
|
||||
|
@ -153,7 +153,7 @@ FPT FPRoundBase(FPUnpacked op, FPCR fpcr, RoundingMode rounding, FPSR& fpsr) {
|
|||
#endif
|
||||
constexpr int max_biased_exp = (1 << E) - 1;
|
||||
if (biased_exp >= max_biased_exp) {
|
||||
result = overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
|
||||
result = overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
|
||||
FPProcessException(FPExc::Overflow, fpcr, fpsr);
|
||||
FPProcessException(FPExc::Inexact, fpcr, fpsr);
|
||||
} else {
|
||||
|
|
Loading…
Reference in a new issue