fp: Implement FPRSqrtStepFused

2018-07-23 22:02:12 +01:00 · 2018-07-23 22:02:12 +01:00 · 6eb069e80d
commit 6eb069e80d
parent b0ff35fcd1
5 changed files with 78 additions and 0 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -29,6 +29,8 @@ add_library(dynarmic
    common/fp/op/FPRoundInt.h
    common/fp/op/FPRSqrtEstimate.cpp
    common/fp/op/FPRSqrtEstimate.h
    common/fp/op/FPRSqrtStepFused.cpp
    common/fp/op/FPRSqrtStepFused.h
    common/fp/op/FPToFixed.cpp
    common/fp/op/FPToFixed.h
    common/fp/process_exception.cpp
--- a/src/common/fp/info.h
+++ b/src/common/fp/info.h
@ -33,6 +33,7 @@ struct FPInfo<u32> {
    static constexpr u32 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
    static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); }
    static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); }
 };
 template<>
@ -55,6 +56,7 @@ struct FPInfo<u64> {
    static constexpr u64 Infinity(bool sign) { return exponent_mask | Zero(sign); }
    static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); }
    static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); }
    static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); }
 };
 } // namespace Dynarmic::FP 
--- a/src/common/fp/op.h
+++ b/src/common/fp/op.h
@ -9,4 +9,5 @@
 #include "common/fp/op/FPMulAdd.h"
 #include "common/fp/op/FPRoundInt.h"
 #include "common/fp/op/FPRSqrtEstimate.h"
 #include "common/fp/op/FPRSqrtStepFused.h"
 #include "common/fp/op/FPToFixed.h"
--- a/src/common/fp/op/FPRSqrtStepFused.cpp
+++ b/src/common/fp/op/FPRSqrtStepFused.cpp
@ -0,0 +1,56 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #include "common/fp/fpcr.h"
 #include "common/fp/fpsr.h"
 #include "common/fp/fused.h"
 #include "common/fp/info.h"
 #include "common/fp/op/FPNeg.h"
 #include "common/fp/op/FPRSqrtStepFused.h"
 #include "common/fp/process_exception.h"
 #include "common/fp/process_nan.h"
 #include "common/fp/unpacked.h"
 namespace Dynarmic::FP {
 template<typename FPT>
 FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
    op1 = FPNeg(op1);
    const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
    const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
    if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
        return *maybe_nan;
    }
    const bool inf1 = type1 == FPType::Infinity;
    const bool inf2 = type2 == FPType::Infinity;
    const bool zero1 = type1 == FPType::Zero;
    const bool zero2 = type2 == FPType::Zero;
    if ((inf1 && zero2) || (zero1 && inf2)) {
        return FPInfo<FPT>::OnePointFive(false);
    }
    if (inf1 || inf2) {
        return FPInfo<FPT>::Infinity(sign1 != sign2);
    }
    // result_value = (3.0 + (value1 * value2)) / 2.0
    FPUnpacked result_value = FusedMulAdd({false, 0, 3}, value1, value2);
    result_value.exponent--;
    if (result_value.mantissa == 0) {
        return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
    }
    return FPRound<FPT>(result_value, fpcr, fpsr);
 }
 template u32 FPRSqrtStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
 template u64 FPRSqrtStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);
 } // namespace Dynarmic::FP 
--- a/src/common/fp/op/FPRSqrtStepFused.h
+++ b/src/common/fp/op/FPRSqrtStepFused.h
@ -0,0 +1,17 @@
 /* This file is part of the dynarmic project.
 * Copyright (c) 2018 MerryMage
 * This software may be used and distributed according to the terms of the GNU
 * General Public License version 2 or any later version.
 */
 #pragma once
 namespace Dynarmic::FP {
 class FPCR;
 class FPSR;
 template<typename FPT>
 FPT FPRSqrtStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr);
 } // namespace Dynarmic::FP