From 726b9914c53f7acf6ffa32b34c662ce0cfce0f2b Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 13 Apr 2019 18:30:17 -0400 Subject: [PATCH 1/5] common/fp/op/FPRSqrtEstimate: Add half-precision specialization for FPRSqrtEstimate --- src/common/fp/op/FPRSqrtEstimate.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/common/fp/op/FPRSqrtEstimate.cpp b/src/common/fp/op/FPRSqrtEstimate.cpp index 895fa418..969ade1c 100644 --- a/src/common/fp/op/FPRSqrtEstimate.cpp +++ b/src/common/fp/op/FPRSqrtEstimate.cpp @@ -19,7 +19,7 @@ namespace Dynarmic::FP { template FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { - auto [type, sign, value] = FPUnpack(op, fpcr, fpsr); + const auto [type, sign, value] = FPUnpack(op, fpcr, fpsr); if (type == FPType::SNaN || type == FPType::QNaN) { return FPProcessNaN(type, op, fpcr, fpsr); @@ -27,16 +27,16 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { if (type == FPType::Zero) { FPProcessException(FPExc::DivideByZero, fpcr, fpsr); - return FPInfo::Infinity(sign); + return FPT(FPInfo::Infinity(sign)); } if (sign) { FPProcessException(FPExc::InvalidOp, fpcr, fpsr); - return FPInfo::DefaultNaN(); + return FPT(FPInfo::DefaultNaN()); } if (type == FPType::Infinity) { - return FPInfo::Zero(false); + return FPT(FPInfo::Zero(false)); } const int result_exponent = (-(value.exponent + 1)) >> 1; @@ -50,6 +50,7 @@ FPT FPRSqrtEstimate(FPT op, FPCR fpcr, FPSR& fpsr) { return (bits_exponent << FPInfo::explicit_mantissa_width) | (bits_mantissa & FPInfo::mantissa_mask); } +template u16 FPRSqrtEstimate(u16 op, FPCR fpcr, FPSR& fpsr); template u32 FPRSqrtEstimate(u32 op, FPCR fpcr, FPSR& fpsr); template u64 FPRSqrtEstimate(u64 op, FPCR fpcr, FPSR& fpsr); From 5dba99b4f4a0c3c9ad8f8736ec7e5af97e4525bb Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 13 Apr 2019 18:34:35 -0400 Subject: [PATCH 2/5] frontend/ir_emitter: Add half-precision opcode variant for FPRSqrtEstimate --- src/backend/x64/emit_x64_floating_point.cpp | 4 ++++ src/frontend/ir/ir_emitter.cpp | 13 ++++++++++--- src/frontend/ir/ir_emitter.h | 2 +- src/frontend/ir/microinstruction.cpp | 1 + src/frontend/ir/opcodes.inc | 1 + 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 2321b77b..b710a989 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -920,6 +920,10 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i code.CallFunction(&FP::FPRSqrtEstimate); } +void EmitX64::EmitFPRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) { + EmitFPRSqrtEstimate(code, ctx, inst); +} + void EmitX64::EmitFPRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { EmitFPRSqrtEstimate(code, ctx, inst); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 27527f6c..86d9923c 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1967,11 +1967,18 @@ U16U32U64 IREmitter::FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, b } } -U32U64 IREmitter::FPRSqrtEstimate(const U32U64& a) { - if (a.GetType() == Type::U32) { +U16U32U64 IREmitter::FPRSqrtEstimate(const U16U32U64& a) { + switch (a.GetType()) { + case Type::U16: + return Inst(Opcode::FPRSqrtEstimate16, a); + case Type::U32: return Inst(Opcode::FPRSqrtEstimate32, a); + case Type::U64: + return Inst(Opcode::FPRSqrtEstimate64, a); + default: + UNREACHABLE(); + return U16U32U64{}; } - return Inst(Opcode::FPRSqrtEstimate64, a); } U32U64 IREmitter::FPRSqrtStepFused(const U32U64& a, const U32U64& b) { diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 09935cf6..27b02776 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -309,7 +309,7 @@ public: U16U32U64 FPRecipExponent(const U16U32U64& a); U32U64 FPRecipStepFused(const U32U64& a, const U32U64& b); U16U32U64 FPRoundInt(const U16U32U64& a, FP::RoundingMode rounding, bool exact); - U32U64 FPRSqrtEstimate(const U32U64& a); + U16U32U64 FPRSqrtEstimate(const U16U32U64& a); U32U64 FPRSqrtStepFused(const U32U64& a, const U32U64& b); U32U64 FPSqrt(const U32U64& a); U32U64 FPSub(const U32U64& a, const U32U64& b, bool fpcr_controlled); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index a7c3386e..f7545b3d 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -282,6 +282,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPRoundInt16: case Opcode::FPRoundInt32: case Opcode::FPRoundInt64: + case Opcode::FPRSqrtEstimate16: case Opcode::FPRSqrtEstimate32: case Opcode::FPRSqrtEstimate64: case Opcode::FPRSqrtStepFused32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index c7a4c227..0da852cb 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -501,6 +501,7 @@ OPCODE(FPRecipStepFused64, U64, U64, OPCODE(FPRoundInt16, U16, U16, U8, U1 ) OPCODE(FPRoundInt32, U32, U32, U8, U1 ) OPCODE(FPRoundInt64, U64, U64, U8, U1 ) +OPCODE(FPRSqrtEstimate16, U16, U16 ) OPCODE(FPRSqrtEstimate32, U32, U32 ) OPCODE(FPRSqrtEstimate64, U64, U64 ) OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) From 037acb17b92bb924fb88f991ba7df411199a8517 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 13 Apr 2019 18:36:24 -0400 Subject: [PATCH 3/5] frontend/ir_emitter: Add half-precision opcode variant for FPVectorRSqrtEstimate --- src/backend/x64/emit_x64_vector_floating_point.cpp | 4 ++++ src/frontend/ir/ir_emitter.cpp | 2 ++ src/frontend/ir/microinstruction.cpp | 1 + src/frontend/ir/opcodes.inc | 1 + 4 files changed, 8 insertions(+) diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 0bf4f619..faa30161 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1241,6 +1241,10 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins }); } +void EmitX64::EmitFPVectorRSqrtEstimate16(EmitContext& ctx, IR::Inst* inst) { + EmitRSqrtEstimate(code, ctx, inst); +} + void EmitX64::EmitFPVectorRSqrtEstimate32(EmitContext& ctx, IR::Inst* inst) { EmitRSqrtEstimate(code, ctx, inst); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 86d9923c..f5a28853 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2302,6 +2302,8 @@ U128 IREmitter::FPVectorRoundInt(size_t esize, const U128& operand, FP::Rounding U128 IREmitter::FPVectorRSqrtEstimate(size_t esize, const U128& a) { switch (esize) { + case 16: + return Inst(Opcode::FPVectorRSqrtEstimate16, a); case 32: return Inst(Opcode::FPVectorRSqrtEstimate32, a); case 64: diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index f7545b3d..bd4bb02c 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -343,6 +343,7 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPVectorRoundInt16: case Opcode::FPVectorRoundInt32: case Opcode::FPVectorRoundInt64: + case Opcode::FPVectorRSqrtEstimate16: case Opcode::FPVectorRSqrtEstimate32: case Opcode::FPVectorRSqrtEstimate64: case Opcode::FPVectorRSqrtStepFused32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 0da852cb..e1aeec4a 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -578,6 +578,7 @@ OPCODE(FPVectorRecipStepFused64, U128, U128 OPCODE(FPVectorRoundInt16, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) +OPCODE(FPVectorRSqrtEstimate16, U128, U128 ) OPCODE(FPVectorRSqrtEstimate32, U128, U128 ) OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) From 77c84bcf9bde598c9e51ce7ab865abbf5ab47305 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 13 Apr 2019 18:38:39 -0400 Subject: [PATCH 4/5] A64: Implement half-precision variant of FRSQRTE's scalar variant --- src/frontend/A64/decoder/a64.inc | 2 +- .../translate/impl/simd_scalar_two_register_misc.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 523c20a3..9b13b634 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -438,7 +438,7 @@ INST(FCMLE_2, "FCMLE (zero)", "01111 INST(FCVTPU_2, "FCVTPU (vector)", "011111101z100001101010nnnnnddddd") //INST(FCVTZU_int_1, "FCVTZU (vector, integer)", "0111111011111001101110nnnnnddddd") INST(FCVTZU_int_2, "FCVTZU (vector, integer)", "011111101z100001101110nnnnnddddd") -//INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") +INST(FRSQRTE_1, "FRSQRTE", "0111111011111001110110nnnnnddddd") INST(FRSQRTE_2, "FRSQRTE", "011111101z100001110110nnnnnddddd") // Data Processing - FP and SIMD - Scalar three same extra diff --git a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp index 623a705c..5e898caf 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_two_register_misc.cpp @@ -200,6 +200,16 @@ bool TranslatorVisitor::FRECPX_2(bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FRSQRTE_1(Vec Vn, Vec Vd) { + const size_t esize = 16; + + const IR::U16 operand = V_scalar(esize, Vn); + const IR::U16 result = ir.FPRSqrtEstimate(operand); + + V_scalar(esize, Vd, result); + return true; +} + bool TranslatorVisitor::FRSQRTE_2(bool sz, Vec Vn, Vec Vd) { const size_t esize = sz ? 64 : 32; From 7b212ec8aed5ddbd92b6011170e1117c3a1e97e1 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 13 Apr 2019 18:39:54 -0400 Subject: [PATCH 5/5] A64: Implement half-precision variant of FRSQRTE's vector variant --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/simd_two_register_misc.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 9b13b634..865ac8ac 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -692,7 +692,7 @@ INST(FCVTPU_4, "FCVTPU (vector)", "0Q101 //INST(FCVTZU_int_3, "FCVTZU (vector, integer)", "0Q10111011111001101110nnnnnddddd") INST(FCVTZU_int_4, "FCVTZU (vector, integer)", "0Q1011101z100001101110nnnnnddddd") INST(URSQRTE, "URSQRTE", "0Q1011101z100001110010nnnnnddddd") -//INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") +INST(FRSQRTE_3, "FRSQRTE", "0Q10111011111001110110nnnnnddddd") INST(FRSQRTE_4, "FRSQRTE", "0Q1011101z100001110110nnnnnddddd") //INST(FSQRT_1, "FSQRT (vector)", "0Q10111011111001111110nnnnnddddd") INST(FSQRT_2, "FSQRT (vector)", "0Q1011101z100001111110nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp index ef0180f9..0610347c 100644 --- a/src/frontend/A64/translate/impl/simd_two_register_misc.cpp +++ b/src/frontend/A64/translate/impl/simd_two_register_misc.cpp @@ -548,6 +548,17 @@ bool TranslatorVisitor::FSQRT_2(bool Q, bool sz, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FRSQRTE_3(bool Q, Vec Vn, Vec Vd) { + const size_t datasize = Q ? 128 : 64; + const size_t esize = 16; + + const IR::U128 operand = V(datasize, Vn); + const IR::U128 result = ir.FPVectorRSqrtEstimate(esize, operand); + + V(datasize, Vd, result); + return true; +} + bool TranslatorVisitor::FRSQRTE_4(bool Q, bool sz, Vec Vn, Vec Vd) { if (sz && !Q) { return ReservedValue();