From f9b28622170f43ebaecf117923ceea17e4c835d9 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 14 Apr 2019 21:04:38 -0400 Subject: [PATCH] A64: Implement FRSQRTS' half-precision scalar variant With the necessary machinery in place, we can now handle the half-precision variant. --- src/frontend/A64/decoder/a64.inc | 2 +- .../A64/translate/impl/simd_scalar_three_same.cpp | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 6d44f970..2a19207b 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -386,7 +386,7 @@ INST(FMULX_vec_2, "FMULX", "01011 INST(FCMEQ_reg_2, "FCMEQ (register)", "010111100z1mmmmm111001nnnnnddddd") INST(FRECPS_1, "FRECPS", "01011110010mmmmm001111nnnnnddddd") INST(FRECPS_2, "FRECPS", "010111100z1mmmmm111111nnnnnddddd") -//INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") +INST(FRSQRTS_1, "FRSQRTS", "01011110110mmmmm001111nnnnnddddd") INST(FRSQRTS_2, "FRSQRTS", "010111101z1mmmmm111111nnnnnddddd") //INST(FCMGE_reg_1, "FCMGE (register)", "01111110010mmmmm001001nnnnnddddd") INST(FCMGE_reg_2, "FCMGE (register)", "011111100z1mmmmm111001nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp index f4eaa6e1..68c15735 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -316,6 +316,17 @@ bool TranslatorVisitor::FRECPS_2(bool sz, Vec Vm, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FRSQRTS_1(Vec Vm, Vec Vn, Vec Vd) { + const size_t esize = 16; + + const IR::U16 operand1 = V_scalar(esize, Vn); + const IR::U16 operand2 = V_scalar(esize, Vm); + const IR::U16 result = ir.FPRSqrtStepFused(operand1, operand2); + + V_scalar(esize, Vd, result); + return true; +} + bool TranslatorVisitor::FRSQRTS_2(bool sz, Vec Vm, Vec Vn, Vec Vd) { const size_t esize = sz ? 64 : 32;