From a86d4093cda9b8fe520e290ce35a135d859ecf46 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Wed, 18 Apr 2018 21:39:03 +0100
Subject: [PATCH] A64: Implement MLA (by element)

---
 src/CMakeLists.txt                            |  1 +
 src/frontend/A64/decoder/a64.inc              |  4 +-
 src/frontend/A64/translate/impl/impl.h        |  4 +-
 .../impl/simd_vector_x_indexed_element.cpp    | 44 +++++++++++++++++++
 4 files changed, 49 insertions(+), 4 deletions(-)
 create mode 100644 src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 945d2f35..200f48d0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -115,6 +115,7 @@ add_library(dynarmic
     frontend/A64/translate/impl/simd_three_different.cpp
     frontend/A64/translate/impl/simd_three_same.cpp
     frontend/A64/translate/impl/simd_two_register_misc.cpp
+    frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
     frontend/A64/translate/impl/sys_dc.cpp
     frontend/A64/translate/impl/system.cpp
     frontend/A64/translate/translate.cpp
diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc
index 5ce2c2e7..27bc527c 100644
--- a/src/frontend/A64/decoder/a64.inc
+++ b/src/frontend/A64/decoder/a64.inc
@@ -818,7 +818,7 @@ INST(USHLL,                  "USHLL, USHLL2",                             "0Q101
 //INST(UCVTF_fix_2,            "UCVTF (vector, fixed-point)",               "0Q1011110IIIIiii111001nnnnnddddd")
 //INST(FCVTZU_fix_2,           "FCVTZU (vector, fixed-point)",              "0Q1011110IIIIiii111111nnnnnddddd")
 
-// Data Processing - FP and SIMD - SIMD x indexed element
+// Data Processing - FP and SIMD - SIMD vector x indexed element
 //INST(SMLAL_elt,              "SMLAL, SMLAL2 (by element)",                "0Q001111zzLMmmmm0010H0nnnnnddddd")
 //INST(SQDMLAL_elt_2,          "SQDMLAL, SQDMLAL2 (by element)",            "0Q001111zzLMmmmm0011H0nnnnnddddd")
 //INST(SMLSL_elt,              "SMLSL, SMLSL2 (by element)",                "0Q001111zzLMmmmm0110H0nnnnnddddd")
@@ -839,7 +839,7 @@ INST(USHLL,                  "USHLL, USHLL2",                             "0Q101
 //INST(FMLAL_elt_2,            "FMLAL, FMLAL2 (by element)",                "0Q1011111zLMmmmm1000H0nnnnnddddd")
 //INST(FMLSL_elt_1,            "FMLSL, FMLSL2 (by element)",                "0Q0011111zLMmmmm0100H0nnnnnddddd")
 //INST(FMLSL_elt_2,            "FMLSL, FMLSL2 (by element)",                "0Q1011111zLMmmmm1100H0nnnnnddddd")
-//INST(MLA_elt,                "MLA (by element)",                          "0Q101111zzLMmmmm0000H0nnnnnddddd")
+INST(MLA_elt,                "MLA (by element)",                          "0Q101111zzLMmmmm0000H0nnnnnddddd")
 //INST(UMLAL_elt,              "UMLAL, UMLAL2 (by element)",                "0Q101111zzLMmmmm0010H0nnnnnddddd")
 //INST(MLS_elt,                "MLS (by element)",                          "0Q101111zzLMmmmm0100H0nnnnnddddd")
 //INST(UMLSL_elt,              "UMLSL, UMLSL2 (by element)",                "0Q101111zzLMmmmm0110H0nnnnnddddd")
diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h
index 7ed461aa..190d951c 100644
--- a/src/frontend/A64/translate/impl/impl.h
+++ b/src/frontend/A64/translate/impl/impl.h
@@ -898,7 +898,7 @@ struct TranslatorVisitor final {
     bool UCVTF_fix_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
     bool FCVTZU_fix_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd);
 
-    // Data Processing - FP and SIMD - SIMD x indexed element
+    // Data Processing - FP and SIMD - SIMD vector x indexed element
     bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool MUL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
@@ -908,7 +908,7 @@ struct TranslatorVisitor final {
     bool FMLAL_elt_2(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool FMLSL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool FMLSL_elt_2(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
-    bool MLA_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
+    bool MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
     bool UMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool MLS_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
     bool UMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
diff --git a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
new file mode 100644
index 00000000..3048a937
--- /dev/null
+++ b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
@@ -0,0 +1,44 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2018 MerryMage
+ * This software may be used and distributed according to the terms of the GNU
+ * General Public License version 2 or any later version.
+ */
+
+#include "frontend/A64/translate/impl/impl.h"
+
+namespace Dynarmic::A64 {
+
+bool TranslatorVisitor::MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
+    const size_t idxdsize = H == 1 ? 128 : 64;
+
+    size_t index;
+    Imm<1> Vmhi{0};
+    switch (size.ZeroExtend()) {
+    case 0b01:
+        index = concatenate(H, L, M).ZeroExtend();
+        break;
+    case 0b10:
+        index = concatenate(H, L).ZeroExtend();
+        Vmhi = M;
+        break;
+    default:
+        return UnallocatedEncoding();
+    }
+
+    const Vec Vm = concatenate(Vmhi, Vmlo).ZeroExtend<Vec>();
+
+    const size_t esize = 8 << size.ZeroExtend();
+    const size_t datasize = Q ? 128 : 64;
+
+    const IR::U128 operand1 = V(datasize, Vn);
+    const IR::U128 operand2 = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, V(idxdsize, Vm), index));
+    const IR::U128 operand3 = V(datasize, Vd);
+
+    const IR::U128 product = ir.VectorMultiply(esize, operand1, operand2);
+    const IR::U128 result = ir.VectorAdd(esize, operand3, product);
+    V(datasize, Vd, result);
+
+    return true;
+}
+
+} // namespace Dynarmic::A64