From bde58b04d4dcda18818a7c6ace77821954ed0ebd Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 22 Jul 2018 18:18:22 +0100 Subject: [PATCH] IR: Implement FPRSqrtEstimate --- src/CMakeLists.txt | 2 + src/backend_x64/emit_x64_floating_point.cpp | 17 +++++ src/common/fp/op.h | 1 + src/common/fp/op/FPRSqrtEstimate.cpp | 83 +++++++++++++++++++++ src/common/fp/op/FPRSqrtEstimate.h | 19 +++++ src/frontend/ir/ir_emitter.cpp | 7 ++ src/frontend/ir/ir_emitter.h | 1 + src/frontend/ir/opcodes.inc | 2 + 8 files changed, 132 insertions(+) create mode 100644 src/common/fp/op/FPRSqrtEstimate.cpp create mode 100644 src/common/fp/op/FPRSqrtEstimate.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3d29151d..8ea66698 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,6 +23,8 @@ add_library(dynarmic common/fp/op.h common/fp/op/FPRoundInt.cpp common/fp/op/FPRoundInt.h + common/fp/op/FPRSqrtEstimate.cpp + common/fp/op/FPRSqrtEstimate.h common/fp/op/FPToFixed.cpp common/fp/op/FPToFixed.h common/fp/process_exception.cpp diff --git a/src/backend_x64/emit_x64_floating_point.cpp b/src/backend_x64/emit_x64_floating_point.cpp index 7c033f45..88b8bdc3 100644 --- a/src/backend_x64/emit_x64_floating_point.cpp +++ b/src/backend_x64/emit_x64_floating_point.cpp @@ -878,6 +878,23 @@ void EmitX64::EmitFPRoundInt64(EmitContext& ctx, IR::Inst* inst) { EmitFPRound(code, ctx, inst, 64); } +template +static void EmitFPRSqrtEsimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(inst, args[0]); + code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR()); + code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]); + code.CallFunction(&FP::FPRSqrtEstimate); +} + +void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { + EmitFPRSqrtEsimate(code, ctx, inst); +} + +void EmitX64::EmitFPRSqrtEstimate64(EmitContext& ctx, IR::Inst* inst) { + EmitFPRSqrtEsimate(code, ctx, inst); +} + void EmitX64::EmitFPSqrt32(EmitContext& ctx, IR::Inst* inst) { FPTwoOp32(code, ctx, inst, &Xbyak::CodeGenerator::sqrtss); } diff --git a/src/common/fp/op.h b/src/common/fp/op.h index 25d199a1..a2ef9dc4 100644 --- a/src/common/fp/op.h +++ b/src/common/fp/op.h @@ -7,4 +7,5 @@ #pragma once #include "common/fp/op/FPRoundInt.h" +#include "common/fp/op/FPRSqrtEstimate.h" #include "common/fp/op/FPToFixed.h" diff --git a/src/common/fp/op/FPRSqrtEstimate.cpp b/src/common/fp/op/FPRSqrtEstimate.cpp new file mode 100644 index 00000000..97f04fdc --- /dev/null +++ b/src/common/fp/op/FPRSqrtEstimate.cpp @@ -0,0 +1,83 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/info.h" +#include "common/fp/op/FPRSqrtEstimate.h" +#include "common/fp/process_exception.h" +#include "common/fp/process_nan.h" +#include "common/fp/unpacked.h" +#include "common/safe_ops.h" + +namespace Dynarmic::FP { + +/// Input is a u1.8 fixed point number. +static u8 RecipSqrtEstimate(u64 a) { + static const std::array lut = []{ + std::array result{}; + for (u64 i = 128; i < 512; i++) { + u64 a = i; + if (a < 256) { + a = a * 2 + 1; + } else { + a = (a | 1) * 2; + } + u64 b = 512; + while (a * (b + 1) * (b + 1) < (1u << 28)) { + b++; + } + result[i] = static_cast((b + 1) / 2); + } + return result; + }(); + + return lut[a & 0x1FF]; +} + +template +FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { + auto [type, sign, value] = FPUnpack(op, fpcr, fpsr); + + if (type == FPType::SNaN || type == FPType::QNaN) { + return FPProcessNaN(type, op, fpcr, fpsr); + } + + if (type == FPType::Zero) { + FPProcessException(FPExc::DivideByZero, fpcr, fpsr); + return FPInfo::Infinity(sign); + } + + if (sign) { + FPProcessException(FPExc::InvalidOp, fpcr, fpsr); + return FPInfo::DefaultNaN(); + } + + if (type == FPType::Infinity) { + return FPInfo::Zero(false); + } + + const int highest_bit = Common::HighestSetBit(value.mantissa); + const int result_exponent = (-(value.exponent + highest_bit + 1)) >> 1; + const bool was_exponent_odd = (value.exponent + highest_bit) % 2 == 0; + + const u64 scaled = Safe::LogicalShiftRight(value.mantissa, highest_bit - (was_exponent_odd ? 7 : 8)); + const u64 estimate = RecipSqrtEstimate(scaled); + + const FPT bits_exponent = static_cast(result_exponent + FPInfo::exponent_bias); + const FPT bits_mantissa = static_cast(estimate << (FPInfo::explicit_mantissa_width - 8)); + return (bits_exponent << FPInfo::explicit_mantissa_width) | (bits_mantissa & FPInfo::mantissa_mask); +} + +template u32 FPRSqrtEstimate(u32 op, FPCR fpcr, FPSR& fpsr); +template u64 FPRSqrtEstimate(u64 op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/common/fp/op/FPRSqrtEstimate.h b/src/common/fp/op/FPRSqrtEstimate.h new file mode 100644 index 00000000..ae2c9649 --- /dev/null +++ b/src/common/fp/op/FPRSqrtEstimate.h @@ -0,0 +1,19 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include "common/common_types.h" +#include "common/fp/fpcr.h" +#include "common/fp/fpsr.h" +#include "common/fp/rounding_mode.h" + +namespace Dynarmic::FP { + +template +FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr); + +} // namespace Dynarmic::FP diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index cc46c63b..df822fe4 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1456,6 +1456,13 @@ U32U64 IREmitter::FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool ex return Inst(Opcode::FPRoundInt64, a, static_cast(rounding), Imm1(exact)); } +U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) { + if (a.GetType() == Type::U32) { + return Inst(Opcode::FPRSqrtEstimate32, a); + } + return Inst(Opcode::FPRSqrtEstimate64, a); +} + U32U64 IREmitter::FPSqrt(const U32U64& a) { if (a.GetType() == Type::U32) { return Inst(Opcode::FPSqrt32, a); diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 6986af67..2a4c240c 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -267,6 +267,7 @@ public: U32U64 FPMulAdd(const U32U64& a, const U32U64& b, const U32U64& c, bool fpscr_controlled); U32U64 FPNeg(const U32U64& a); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); + U32U64 FPRSqrtEstimate(const U32U64& a); U32U64 FPSqrt(const U32U64& a); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpscr_controlled); U32 FPDoubleToSingle(const U64& a, bool fpscr_controlled); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7a3ca289..9e613abb 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -386,6 +386,8 @@ OPCODE(FPNeg32, T::U32, T::U32 OPCODE(FPNeg64, T::U64, T::U64 ) OPCODE(FPRoundInt32, T::U32, T::U32, T::U8, T::U1 ) OPCODE(FPRoundInt64, T::U64, T::U64, T::U8, T::U1 ) +OPCODE(FPRSqrtEstimate32, T::U32, T::U32 ) +OPCODE(FPRSqrtEstimate64, T::U64, T::U64 ) OPCODE(FPSqrt32, T::U32, T::U32 ) OPCODE(FPSqrt64, T::U64, T::U64 ) OPCODE(FPSub32, T::U32, T::U32, T::U32 )