From 1651e604623118b94f5b69b442f0ca3a4abe64fa Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 18 Apr 2018 19:30:30 -0400 Subject: [PATCH] A64: Implement MUL (by element) --- src/frontend/A64/decoder/a64.inc | 2 +- src/frontend/A64/translate/impl/impl.h | 2 +- .../impl/simd_vector_x_indexed_element.cpp | 67 ++++++++++++------- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 27bc527c..dba77293 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -823,7 +823,7 @@ INST(USHLL, "USHLL, USHLL2", "0Q101 //INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd") //INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd") //INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd") -//INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") +INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd") //INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd") //INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd") //INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index 190d951c..31aa6e65 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -901,7 +901,7 @@ struct TranslatorVisitor final { // Data Processing - FP and SIMD - SIMD vector x indexed element bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); - bool MUL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); + bool MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd); bool SMULL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool SDOT_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); bool FMLAL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd); diff --git a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp index 3048a937..a633c569 100644 --- a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp +++ b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp @@ -4,40 +4,59 @@ * General Public License version 2 or any later version. */ +#include #include "frontend/A64/translate/impl/impl.h" namespace Dynarmic::A64 { +namespace { +std::pair Combine(Imm<2> size, Imm<1> H, Imm<1> L, Imm<1> M, Imm<4> Vmlo) { + if (size == 0b01) { + return {concatenate(H, L, M).ZeroExtend(), Vmlo.ZeroExtend()}; + } + + return {concatenate(H, L).ZeroExtend(), concatenate(M, Vmlo).ZeroExtend()}; +} + +enum class ExtraBehavior { + None, + Accumulate, +}; + +void MultiplyByElement(TranslatorVisitor& v, bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd, + ExtraBehavior extra_behavior) { + const auto [index, Vm] = Combine(size, H, L, M, Vmlo); + const size_t idxdsize = H == 1 ? 128 : 64; + const size_t esize = 8 << size.ZeroExtend(); + const size_t datasize = Q ? 128 : 64; + + const IR::U128 operand1 = v.V(datasize, Vn); + const IR::U128 operand2 = v.ir.VectorBroadcast(esize, v.ir.VectorGetElement(esize, v.V(idxdsize, Vm), index)); + const IR::U128 operand3 = v.V(datasize, Vd); + + IR::U128 result = v.ir.VectorMultiply(esize, operand1, operand2); + if (extra_behavior == ExtraBehavior::Accumulate) { + result = v.ir.VectorAdd(esize, operand3, result); + } + + v.V(datasize, Vd, result); +} +} // Anonymous namespace bool TranslatorVisitor::MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { - const size_t idxdsize = H == 1 ? 128 : 64; - - size_t index; - Imm<1> Vmhi{0}; - switch (size.ZeroExtend()) { - case 0b01: - index = concatenate(H, L, M).ZeroExtend(); - break; - case 0b10: - index = concatenate(H, L).ZeroExtend(); - Vmhi = M; - break; - default: + if (size != 0b01 && size != 0b10) { return UnallocatedEncoding(); } - const Vec Vm = concatenate(Vmhi, Vmlo).ZeroExtend(); + MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Accumulate); + return true; +} - const size_t esize = 8 << size.ZeroExtend(); - const size_t datasize = Q ? 128 : 64; - - const IR::U128 operand1 = V(datasize, Vn); - const IR::U128 operand2 = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, V(idxdsize, Vm), index)); - const IR::U128 operand3 = V(datasize, Vd); - - const IR::U128 product = ir.VectorMultiply(esize, operand1, operand2); - const IR::U128 result = ir.VectorAdd(esize, operand3, product); - V(datasize, Vd, result); +bool TranslatorVisitor::MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) { + if (size != 0b01 && size != 0b10) { + return UnallocatedEncoding(); + } + MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None); return true; }