fp: Implement FPRSqrtStepFused
This commit is contained in:
parent
b0ff35fcd1
commit
6eb069e80d
5 changed files with 78 additions and 0 deletions
|
@ -29,6 +29,8 @@ add_library(dynarmic
|
||||||
common/fp/op/FPRoundInt.h
|
common/fp/op/FPRoundInt.h
|
||||||
common/fp/op/FPRSqrtEstimate.cpp
|
common/fp/op/FPRSqrtEstimate.cpp
|
||||||
common/fp/op/FPRSqrtEstimate.h
|
common/fp/op/FPRSqrtEstimate.h
|
||||||
|
common/fp/op/FPRSqrtStepFused.cpp
|
||||||
|
common/fp/op/FPRSqrtStepFused.h
|
||||||
common/fp/op/FPToFixed.cpp
|
common/fp/op/FPToFixed.cpp
|
||||||
common/fp/op/FPToFixed.h
|
common/fp/op/FPToFixed.h
|
||||||
common/fp/process_exception.cpp
|
common/fp/process_exception.cpp
|
||||||
|
|
|
@ -33,6 +33,7 @@ struct FPInfo<u32> {
|
||||||
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
|
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
|
||||||
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
|
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
|
||||||
static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
|
static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
|
||||||
|
static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
|
@ -55,6 +56,7 @@ struct FPInfo<u64> {
|
||||||
static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
|
static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
|
||||||
static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
|
static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
|
||||||
static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); }
|
static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); }
|
||||||
|
static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic::FP
|
} // namespace Dynarmic::FP
|
||||||
|
|
|
@ -9,4 +9,5 @@
|
||||||
#include "common/fp/op/FPMulAdd.h"
|
#include "common/fp/op/FPMulAdd.h"
|
||||||
#include "common/fp/op/FPRoundInt.h"
|
#include "common/fp/op/FPRoundInt.h"
|
||||||
#include "common/fp/op/FPRSqrtEstimate.h"
|
#include "common/fp/op/FPRSqrtEstimate.h"
|
||||||
|
#include "common/fp/op/FPRSqrtStepFused.h"
|
||||||
#include "common/fp/op/FPToFixed.h"
|
#include "common/fp/op/FPToFixed.h"
|
||||||
|
|
56
src/common/fp/op/FPRSqrtStepFused.cpp
Normal file
56
src/common/fp/op/FPRSqrtStepFused.cpp
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/fp/fpcr.h"
|
||||||
|
#include "common/fp/fpsr.h"
|
||||||
|
#include "common/fp/fused.h"
|
||||||
|
#include "common/fp/info.h"
|
||||||
|
#include "common/fp/op/FPNeg.h"
|
||||||
|
#include "common/fp/op/FPRSqrtStepFused.h"
|
||||||
|
#include "common/fp/process_exception.h"
|
||||||
|
#include "common/fp/process_nan.h"
|
||||||
|
#include "common/fp/unpacked.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
template<typename FPT>
|
||||||
|
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
|
||||||
|
op1 = FPNeg(op1);
|
||||||
|
|
||||||
|
const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
|
||||||
|
const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
|
||||||
|
|
||||||
|
if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
|
||||||
|
return *maybe_nan;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool inf1 = type1 == FPType::Infinity;
|
||||||
|
const bool inf2 = type2 == FPType::Infinity;
|
||||||
|
const bool zero1 = type1 == FPType::Zero;
|
||||||
|
const bool zero2 = type2 == FPType::Zero;
|
||||||
|
|
||||||
|
if ((inf1 && zero2) || (zero1 && inf2)) {
|
||||||
|
return FPInfo<FPT>::OnePointFive(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inf1 || inf2) {
|
||||||
|
return FPInfo<FPT>::Infinity(sign1 != sign2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// result_value = (3.0 + (value1 * value2)) / 2.0
|
||||||
|
FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
|
||||||
|
result_value.exponent--;
|
||||||
|
|
||||||
|
if (result_value.mantissa == 0) {
|
||||||
|
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
|
||||||
|
}
|
||||||
|
return FPRound<FPT>(result_value, fpcr, fpsr);
|
||||||
|
}
|
||||||
|
|
||||||
|
template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
17
src/common/fp/op/FPRSqrtStepFused.h
Normal file
17
src/common/fp/op/FPRSqrtStepFused.h
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace Dynarmic::FP {
|
||||||
|
|
||||||
|
class FPCR;
|
||||||
|
class FPSR;
|
||||||
|
|
||||||
|
template<typename FPT>
|
||||||
|
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
|
||||||
|
|
||||||
|
} // namespace Dynarmic::FP
|
Loading…
Reference in a new issue