From 901bd9b4e29290d1159f21584d8465d902dff28e Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 25 Jul 2018 19:11:20 +0100 Subject: [PATCH] IR: Implement FPRecipStepFused, FPVectorRecipStepFused --- src/CMakeLists.txt | 2 + src/backend_x64/emit_x64_floating_point.cpp | 17 ++++++ .../emit_x64_vector_floating_point.cpp | 17 ++++++ src/common/fp/info.h | 2 + src/common/fp/op.h | 1 + src/common/fp/op/FPRecipStepFused.cpp | 55 +++++++++++++++++++ src/common/fp/op/FPRecipStepFused.h | 17 ++++++ src/frontend/ir/ir_emitter.cpp | 18 ++++++ src/frontend/ir/ir_emitter.h | 2 + src/frontend/ir/opcodes.inc | 4 ++ 10 files changed, 135 insertions(+) create mode 100644 src/common/fp/op/FPRecipStepFused.cpp create mode 100644 src/common/fp/op/FPRecipStepFused.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f9782fb2..2e18b2cf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(dynarmic common/fp/op/FPMulAdd.h common/fp/op/FPRecipEstimate.cpp common/fp/op/FPRecipEstimate.h + common/fp/op/FPRecipStepFused.cpp + common/fp/op/FPRecipStepFused.h common/fp/op/FPRoundInt.cpp common/fp/op/FPRoundInt.h common/fp/op/FPRSqrtEstimate.cpp diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 85235399..c5455217 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -830,6 +830,23 @@ void EmitX64::EmitFPRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { EmitFPRecipEstimate(code, ctx, inst); } +template +static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, args[0], args[1]); + code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRecipStepFused); +} + +void EmitX64::EmitFPRecipStepFused32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipStepFused(code, ctx, inst); +} + +void EmitX64::EmitFPRecipStepFused64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipStepFused(code, ctx, inst); +} + static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, size_t fsize) { const auto rounding = static_cast(inst->GetArg(1).GetU8()); const bool exact = inst->GetArg(2).GetU1(); diff --git a/src/backend_x64/emit_x64_vector_floating_point.cpp b/src/backend_x64/emit_x64_vector_floating_point.cpp index a16f2d91..53a638c0 100644 --- a/src/backend_x64/emit_x64_vector_floating_point.cpp +++ b/src/backend_x64/emit_x64_vector_floating_point.cpp @@ -631,6 +631,23 @@ void EmitX64::EmitFPVectorRecipEstimate64(EmitContext& ctx, IR::Inst* inst) { EmitRecipEstimate(code, ctx, inst); } +template +static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + EmitThreeOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& op1, const VectorArray& op2, FP::FPCR fpcr, FP::FPSR& fpsr) { + for (size_t i = 0; i < result.size(); i++) { + result[i] = FP::FPRecipStepFused(op1[i], op2[i], fpcr, fpsr); + } + }); +} + +void EmitX64::EmitFPVectorRecipStepFused32(EmitContext& ctx, IR::Inst* inst) { + EmitRecipStepFused(code, ctx, inst); +} + +void EmitX64::EmitFPVectorRecipStepFused64(EmitContext& ctx, IR::Inst* inst) { + EmitRecipStepFused(code, ctx, inst); +} + template static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { EmitTwoOpFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& operand, FP::FPCR fpcr, FP::FPSR& fpsr) { diff --git a/src/common/fp/info.h b/src/common/fp/info.h index a0f265fd..b78f1382 100644 --- a/src/common/fp/info.h +++ b/src/common/fp/info.h @@ -34,6 +34,7 @@ struct FPInfo { static constexpr u32 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u32 DefaultNaN() { return exponent_mask | (u32(1) << (explicit_mantissa_width - 1)); } static constexpr u32 OnePointFive(bool sign) { return Zero(sign) | (u32(1) << (explicit_mantissa_width - 1)) | (u32(exponent_bias) << explicit_mantissa_width); } + static constexpr u32 Two(bool sign) { return Zero(sign) | (u32(exponent_bias + 1) << explicit_mantissa_width); } }; template<> @@ -57,6 +58,7 @@ struct FPInfo { static constexpr u64 MaxNormal(bool sign) { return (exponent_mask - 1) | Zero(sign); } static constexpr u64 DefaultNaN() { return exponent_mask | (u64(1) << (explicit_mantissa_width - 1)); } static constexpr u64 OnePointFive(bool sign) { return Zero(sign) | (u64(1) << (explicit_mantissa_width - 1)) | (u64(exponent_bias) << explicit_mantissa_width); } + static constexpr u64 Two(bool sign) { return Zero(sign) | (u64(exponent_bias + 1) << explicit_mantissa_width); } }; } // namespace Dynarmic::FP diff --git a/src/common/fp/op.h b/src/common/fp/op.h index 6f8749ff..f7232407 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -8,6 +8,7 @@ #include "common/fp/op/FPMulAdd.h" #include "common/fp/op/FPRecipEstimate.h" +#include "common/fp/op/FPRecipStepFused.h" #include "common/fp/op/FPRoundInt.h" #include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPRSqrtStepFused.h" diff --git a/src/common/fp/op/FPRecipStepFused.cpp b/src/common/fp/op/FPRecipStepFused.cpp new file mode 100644 index 00000000..da849c0a --- /dev/null +++ b/src/common/fp/op/FPRecipStepFused.cpp @@ -0,0 +1,55 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/fused.h" +#include "common/fp/info.h" +#include "common/fp/op/FPNeg.h" +#include "common/fp/op/FPRecipStepFused.h" +#include "common/fp/process_exception.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" + +namespace Dynarmic::FP { + +template +FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) { + op1 = FPNeg(op1); + + const auto [type1, sign1, value1] = FPUnpack(op1, fpcr, fpsr); + const auto [type2, sign2, value2] = FPUnpack(op2, fpcr, fpsr); + + if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) { + return *maybe_nan; + } + + const bool inf1 = type1 == FPType::Infinity; + const bool inf2 = type2 == FPType::Infinity; + const bool zero1 = type1 == FPType::Zero; + const bool zero2 = type2 == FPType::Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) { + return FPInfo::Two(false); + } + + if (inf1 || inf2) { + return FPInfo::Infinity(sign1 != sign2); + } + + // result_value = 2.0 + (value1 * value2) + FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2); + + if (result_value.mantissa == 0) { + return FPInfo::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity); + } + return FPRound(result_value, fpcr, fpsr); +} + +template u32 FPRecipStepFused(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr); +template u64 FPRecipStepFused(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRecipStepFused.h b/src/common/fp/op/FPRecipStepFused.h new file mode 100644 index 00000000..09222638 --- /dev/null +++ b/src/common/fp/op/FPRecipStepFused.h @@ -0,0 +1,17 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +namespace Dynarmic::FP { + +class FPCR; +class FPSR; + +template +FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 9a060056..41676fe1 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1495,6 +1495,13 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) { return Inst(Opcode::FPRecipEstimate64, a); } +U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRecipStepFused32, a, b); + } + return Inst(Opcode::FPRecipStepFused64, a, b); +} + U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPRoundInt32, a, static_cast(rounding), Imm1(exact)); @@ -1760,6 +1767,17 @@ U128 IREmitter::FPVectorRecipEstimate(size_t esize, const U128& a) { return {}; } +U128 IREmitter::FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorRecipStepFused32, a, b); + case 64: + return Inst(Opcode::FPVectorRecipStepFused64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index da013cd2..fef3a97a 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -270,6 +270,7 @@ public: U32U64 FPMulAdd(const U32U64& addend, const U32U64& op1, const U32U64& op2, bool fpscr_controlled); U32U64 FPNeg(const U32U64& a); U32U64 FPRecipEstimate(const U32U64& a); + U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); @@ -306,6 +307,7 @@ public: U128 FPVectorPairedAdd(size_t esize, const U128& a, const U128& b); U128 FPVectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 FPVectorRecipEstimate(size_t esize, const U128& a); + U128 FPVectorRecipStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7ec26325..8ee80f97 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -396,6 +396,8 @@ OPCODE(FPNeg32, T::U32, T::U32 OPCODE(FPNeg64, T::U64, T::U64 ) OPCODE(FPRecipEstimate32, T::U32, T::U32 ) OPCODE(FPRecipEstimate64, T::U64, T::U64 ) +OPCODE(FPRecipStepFused32, T::U32, T::U32, T::U32 ) +OPCODE(FPRecipStepFused64, T::U64, T::U64, T::U64 ) OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 ) OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 ) OPCODE(FPRSqrtEstimate32, T::U32, T::U32 ) @@ -454,6 +456,8 @@ OPCODE(FPVectorPairedAdd32, T::U128, T::U128, OPCODE(FPVectorPairedAdd64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorRecipEstimate32, T::U128, T::U128 ) OPCODE(FPVectorRecipEstimate64, T::U128, T::U128 ) +OPCODE(FPVectorRecipStepFused32, T::U128, T::U128, T::U128 ) +OPCODE(FPVectorRecipStepFused64, T::U128, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate32, T::U128, T::U128 ) OPCODE(FPVectorRSqrtEstimate64, T::U128, T::U128 ) OPCODE(FPVectorRSqrtStepFused32, T::U128, T::U128, T::U128 )