fp: Implement FPRSqrtStepFused

This commit is contained in:
MerryMage 2018-07-23 22:02:12 +01:00
parent b0ff35fcd1
commit 6eb069e80d
5 changed files with 78 additions and 0 deletions

View file

@ -29,6 +29,8 @@ add_library(dynarmic
common/fp/op/FPRoundInt.h common/fp/op/FPRoundInt.h
common/fp/op/FPRSqrtEstimate.cpp common/fp/op/FPRSqrtEstimate.cpp
common/fp/op/FPRSqrtEstimate.h common/fp/op/FPRSqrtEstimate.h
common/fp/op/FPRSqrtStepFused.cpp
common/fp/op/FPRSqrtStepFused.h
common/fp/op/FPToFixed.cpp common/fp/op/FPToFixed.cpp
common/fp/op/FPToFixed.h common/fp/op/FPToFixed.h
common/fp/process_exception.cpp common/fp/process_exception.cpp

View file

@ -33,6 +33,7 @@ struct FPInfo<u32> {
static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); } static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); } static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); }
}; };
template<> template<>
@ -55,6 +56,7 @@ struct FPInfo<u64> {
static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); } static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); } static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); }
static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); }
}; };
} // namespace Dynarmic::FP } // namespace Dynarmic::FP

View file

@ -9,4 +9,5 @@
#include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPMulAdd.h"
#include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRoundInt.h"
#include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPRSqrtEstimate.h"
#include "common/fp/op/FPRSqrtStepFused.h"
#include "common/fp/op/FPToFixed.h" #include "common/fp/op/FPToFixed.h"

View file

@ -0,0 +1,56 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "common/fp/fpcr.h"
#include "common/fp/fpsr.h"
#include "common/fp/fused.h"
#include "common/fp/info.h"
#include "common/fp/op/FPNeg.h"
#include "common/fp/op/FPRSqrtStepFused.h"
#include "common/fp/process_exception.h"
#include "common/fp/process_nan.h"
#include "common/fp/unpacked.h"
namespace Dynarmic::FP {
template<typename FPT>
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
op1 = FPNeg(op1);
const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
return *maybe_nan;
}
const bool inf1 = type1 == FPType::Infinity;
const bool inf2 = type2 == FPType::Infinity;
const bool zero1 = type1 == FPType::Zero;
const bool zero2 = type2 == FPType::Zero;
if ((inf1 && zero2) || (zero1 && inf2)) {
return FPInfo<FPT>::OnePointFive(false);
}
if (inf1 || inf2) {
return FPInfo<FPT>::Infinity(sign1 != sign2);
}
// result_value = (3.0 + (value1 * value2)) / 2.0
FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
result_value.exponent--;
if (result_value.mantissa == 0) {
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
}
return FPRound<FPT>(result_value, fpcr, fpsr);
}
template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP

View file

@ -0,0 +1,17 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
namespace Dynarmic::FP {
class FPCR;
class FPSR;
template<typename FPT>
FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
} // namespace Dynarmic::FP