From 9a3d38d2eef309a2e014c76500b140f7d9959a75 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 8 Sep 2018 15:18:30 -0400 Subject: [PATCH] A64: Implement SMLAL{2}, SMLSL{2}, UMLAL{2}, and UMLSL{2}'s vector by-element variants We can simply modify the general function made for SMULL{2} and UMULL{2}'s by-element variants to also handle the other multiply-based by-element variants. --- src/frontend/A64/decoder/a64.inc | 8 ++-- src/frontend/A64/translate/impl/impl.h | 8 ++-- .../impl/simd_vector_x_indexed_element.cpp | 38 ++++++++++++++++--- 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 6d22bd03..c7c0f4f6 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -820,9 +820,9 @@ INST(USHLL, "USHLL, USHLL2", "0Q101 //INST(FCVTZU_fix_2, "FCVTZU (vector, fixed-point)", "0Q1011110IIIIiii111111nnnnnddddd") // Data Processing - FP and SIMD - SIMD vector x indexed element -//INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") +INST(SMLAL_elt, "SMLAL, SMLAL2 (by element)", "0Q001111zzLMmmmm0010H0nnnnnddddd") //INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") -//INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") +INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") //INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") @@ -841,9 +841,9 @@ INST(FMUL_elt_4, "FMUL (by element)", "0Q001 //INST(FMLSL_elt_1, "FMLSL, FMLSL2 (by element)", "0Q0011111zLMmmmm0100H0nnnnnddddd") //INST(FMLSL_elt_2, "FMLSL, FMLSL2 (by element)", "0Q1011111zLMmmmm1100H0nnnnnddddd") INST(MLA_elt, "MLA (by element)", "0Q101111zzLMmmmm0000H0nnnnnddddd") -//INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") +INST(UMLAL_elt, "UMLAL, UMLAL2 (by element)", "0Q101111zzLMmmmm0010H0nnnnnddddd") INST(MLS_elt, "MLS (by element)", "0Q101111zzLMmmmm0100H0nnnnnddddd") -//INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") +INST(UMLSL_elt, "UMLSL, UMLSL2 (by element)", "0Q101111zzLMmmmm0110H0nnnnnddddd") INST(UMULL_elt, "UMULL, UMULL2 (by element)", "0Q101111zzLMmmmm1010H0nnnnnddddd") //INST(SQRDMLAH_elt_2, "SQRDMLAH (by element)", "0Q101111zzLMmmmm1101H0nnnnnddddd") INST(UDOT_elt, "UDOT (by element)", "0Q101111zzLMmmmm1110H0nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index fc596da4..bcef7905 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -890,9 +890,9 @@ struct TranslatorVisitor final { bool FCVTZU_fix_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd); // Data Processing - FP and SIMD - SIMD vector x indexed element - bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); + bool SMLAL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool SQDMLAL_elt_2(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Reg Rn, Vec Vd); - bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); + bool SMLSL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool SQDMLSL_elt_2(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Reg Rn, Vec Vd); bool MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool SMULL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vm, Imm<1> H, Vec Vn, Vec Vd); @@ -911,9 +911,9 @@ struct TranslatorVisitor final { bool FMLSL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool FMLSL_elt_2(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); - bool UMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); + bool UMLAL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool MLS_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); - bool UMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); + bool UMLSL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool UMULL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool SQRDMLAH_elt_2(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool UDOT_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); diff --git a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp index df6979df..eb28013b 100644 --- a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp +++ b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp @@ -128,7 +128,7 @@ enum class Signedness { }; bool MultiplyLong(TranslatorVisitor& v, bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, - Imm<1> H, Vec Vn, Vec Vd, Signedness sign) { + Imm<1> H, Vec Vn, Vec Vd, ExtraBehavior extra_behavior, Signedness sign) { if (size == 0b00 || size == 0b11) { return v.UnallocatedEncoding(); } @@ -158,8 +158,20 @@ bool MultiplyLong(TranslatorVisitor& v, bool Q, Imm<2> size, Imm<1> L, Imm<1> M, const IR::U128 operand2 = v.V(idxsize, concatenate(Vmhi, Vmlo).ZeroExtend()); const IR::U128 index_vector = v.ir.VectorBroadcast(esize, v.ir.VectorGetElement(esize, operand2, index)); - const auto [extended_op1, extended_index] = extend_operands(operand1, index_vector); - const IR::U128 result = v.ir.VectorMultiply(2 * esize, extended_op1, extended_index); + const IR::U128 result = [&] { + const auto [extended_op1, extended_index] = extend_operands(operand1, index_vector); + const IR::U128 product = v.ir.VectorMultiply(2 * esize, extended_op1, extended_index); + if (extra_behavior == ExtraBehavior::None) { + return product; + } + + const IR::U128 operand3 = v.V(2 * datasize, Vd); + if (extra_behavior == ExtraBehavior::Accumulate) { + return v.ir.VectorAdd(2 * esize, operand3, product); + } + + return v.ir.VectorSub(2 * esize, operand3, product); + }(); v.V(2 * datasize, Vd, result); return true; @@ -190,8 +202,16 @@ bool TranslatorVisitor::FMUL_elt_4(bool Q, bool sz, Imm<1> L, Imm<1> M, Imm<4> V return FPMultiplyByElement(*this, Q, sz, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None); } +bool TranslatorVisitor::SMLAL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Accumulate, Signedness::Signed); +} + +bool TranslatorVisitor::SMLSL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Subtract, Signedness::Signed); +} + bool TranslatorVisitor::SMULL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { - return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, Signedness::Signed); + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None, Signedness::Signed); } bool TranslatorVisitor::SQDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { @@ -227,8 +247,16 @@ bool TranslatorVisitor::UDOT_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> return DotProduct(*this, Q, size, L, M, Vmlo, H, Vn, Vd, &IREmitter::ZeroExtendToWord); } +bool TranslatorVisitor::UMLAL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Accumulate, Signedness::Unsigned); +} + +bool TranslatorVisitor::UMLSL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Subtract, Signedness::Unsigned); +} + bool TranslatorVisitor::UMULL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { - return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, Signedness::Unsigned); + return MultiplyLong(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None, Signedness::Unsigned); } } // namespace Dynarmic::A64