diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 88ceb413..028c281d 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -728,6 +728,10 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.CallFunction(&FP::FPRecipExponent); } +void EmitX64::EmitFPRecipExponent16(EmitContext& ctx, IR::Inst* inst) { + EmitFPRecipExponent(code, ctx, inst); +} + void EmitX64::EmitFPRecipExponent32(EmitContext& ctx, IR::Inst* inst) { EmitFPRecipExponent(code, ctx, inst); } diff --git a/src/common/fp/op/FPRecipExponent.cpp b/src/common/fp/op/FPRecipExponent.cpp index eaa7addb..4e0224e7 100644 --- a/src/common/fp/op/FPRecipExponent.cpp +++ b/src/common/fp/op/FPRecipExponent.cpp @@ -17,28 +17,16 @@ namespace Dynarmic::FP { namespace { -// We don't care about unreachable code warnings here -// TODO: Remove this warning disabling of warnings when -// half-float support is added. -#ifdef _MSC_VER -#pragma warning(disable:4702) -#endif template FPT DetermineExponentValue(size_t value) { if constexpr (sizeof(FPT) == sizeof(u32)) { return static_cast(Common::Bits<23, 30>(value)); - } - - if constexpr (sizeof(FPT) == sizeof(u64)) { + } else if constexpr (sizeof(FPT) == sizeof(u64)) { return static_cast(Common::Bits<52, 62>(value)); + } else { + return static_cast(Common::Bits<10, 14>(value)); } - - // Half-float - return static_cast(Common::Bits<10, 14>(value)); } -#ifdef _MSC_VER -#pragma warning(default:4702) -#endif } // Anonymous namespace template @@ -50,7 +38,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { return FPProcessNaN(type, op, fpcr, fpsr); } - const FPT sign_bits = FPInfo::Zero(sign); + const FPT sign_bits = FPT(FPInfo::Zero(sign)); const FPT exponent = DetermineExponentValue(op); // Zero and denormals @@ -64,6 +52,7 @@ FPT FPRecipExponent(FPT op, FPCR fpcr, FPSR& fpsr) { return FPT(sign_bits | negated_exponent); } +template u16 FPRecipExponent(u16 op, FPCR fpcr, FPSR& fpsr); template u32 FPRecipExponent(u32 op, FPCR fpcr, FPSR& fpsr); template u64 FPRecipExponent(u64 op, FPCR fpcr, FPSR& fpsr); diff --git a/src/common/fp/unpacked.cpp b/src/common/fp/unpacked.cpp index ad6374a1..96251779 100644 --- a/src/common/fp/unpacked.cpp +++ b/src/common/fp/unpacked.cpp @@ -33,7 +33,9 @@ std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) if (frac_raw == 0 || fpcr.FZ16()) { return {FPType::Zero, sign, {sign, 0, 0}}; } + return {FPType::Nonzero, sign, ToNormalized(sign, denormal_exponent, frac_raw)}; } + if (frac_raw == 0 || fpcr.FZ()) { if (frac_raw != 0) { FPProcessException(FPExc::InputDenorm, fpcr, fpsr); @@ -46,7 +48,7 @@ std::tuple FPUnpackBase(FPT op, FPCR fpcr, FPSR& fpsr) const bool exp_all_ones = exp_raw == Common::Ones(FPInfo::exponent_width); const bool ahp_disabled = is_half_precision && !fpcr.AHP(); - if (exp_all_ones || ahp_disabled) { + if ((exp_all_ones && !is_half_precision) || (exp_all_ones && ahp_disabled)) { if (frac_raw == 0) { return {FPType::Infinity, sign, ToNormalized(sign, 1000000, 1)}; } diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 6bef2c70..707f1a0d 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -420,7 +420,7 @@ INST(FCVTPS_2, "FCVTPS (vector)", "01011 INST(FCVTZS_int_2, "FCVTZS (vector, integer)", "010111101z100001101110nnnnnddddd") //INST(FRECPE_1, "FRECPE", "0101111011111001110110nnnnnddddd") INST(FRECPE_2, "FRECPE", "010111101z100001110110nnnnnddddd") -//INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") +INST(FRECPX_1, "FRECPX", "0101111011111001111110nnnnnddddd") INST(FRECPX_2, "FRECPX", "010111101z100001111110nnnnnddddd") //INST(FCVTNU_1, "FCVTNU (vector)", "0111111001111001101010nnnnnddddd") INST(FCVTNU_2, "FCVTNU (vector)", "011111100z100001101010nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp index 7dcc89d3..d9b3e29c 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp @@ -182,6 +182,14 @@ bool TranslatorVisitor::FRECPE_2(bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FRECPX_1(Vec Vn, Vec Vd) { + const IR::U16 operand = V_scalar(16, Vn); + const IR::U16 result = ir.FPRecipExponent(operand); + + V_scalar(16, Vd, result); + return true; +} + bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) { const size_t esize = sz ? 64 : 32; diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 07b54308..35c8d07b 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1895,11 +1895,18 @@ U32U64 IREmitter::FPRecipEstimate(const U32U64& a) { return Inst(Opcode::FPRecipEstimate64, a); } -U32U64 IREmitter::FPRecipExponent(const U32U64& a) { - if (a.GetType() == Type::U32) { +U16U32U64 IREmitter::FPRecipExponent(const U16U32U64& a) { + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPRecipExponent16, a); + case Type::U32: return Inst(Opcode::FPRecipExponent32, a); + case Type::U64: + return Inst(Opcode::FPRecipExponent64, a); + default: + UNREACHABLE(); + return U16U32U64{}; } - return Inst(Opcode::FPRecipExponent64, a); } U32U64 IREmitter::FPRecipStepFused(const U32U64& a, const U32U64& b) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 8fb3cd57..0e29d716 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -305,7 +305,7 @@ public: U32U64 FPMulX(const U32U64& a, const U32U64& b); U32U64 FPNeg(const U32U64& a); U32U64 FPRecipEstimate(const U32U64& a); - U32U64 FPRecipExponent(const U32U64& a); + U16U32U64 FPRecipExponent(const U16U32U64& a); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U32U64 FPRoundInt(const U32U64& a, FP::RoundingMode rounding, bool exact); U32U64 FPRSqrtEstimate(const U32U64& a); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 10257e54..3055d2fd 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -273,6 +273,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPMulAdd64: case Opcode::FPRecipEstimate32: case Opcode::FPRecipEstimate64: + case Opcode::FPRecipExponent16: case Opcode::FPRecipExponent32: case Opcode::FPRecipExponent64: case Opcode::FPRecipStepFused32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 06b06e2c..ca87994a 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -486,6 +486,7 @@ OPCODE(FPNeg32, U32, U32 OPCODE(FPNeg64, U64, U64 ) OPCODE(FPRecipEstimate32, U32, U32 ) OPCODE(FPRecipEstimate64, U64, U64 ) +OPCODE(FPRecipExponent16, U16, U16 ) OPCODE(FPRecipExponent32, U32, U32 ) OPCODE(FPRecipExponent64, U64, U64 ) OPCODE(FPRecipStepFused32, U32, U32, U32 ) diff --git a/src/frontend/ir/value.h b/src/frontend/ir/value.h index 5beaf9e8..f4d010e5 100644 --- a/src/frontend/ir/value.h +++ b/src/frontend/ir/value.h @@ -163,6 +163,7 @@ using U32 = TypedValue; using U64 = TypedValue; using U128 = TypedValue; using U32U64 = TypedValue; +using U16U32U64 = TypedValue; using UAny = TypedValue; using UAnyU128 = TypedValue; using NZCV = TypedValue;