From 3202e4c539fbb31216c13ae1d931026cd1e4129c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 21 Jun 2020 15:25:26 +0100 Subject: [PATCH] A32: Implement ASIMD VLD{1,2,3,4} (single n-element structure to all lanes) --- src/frontend/A32/decoder/asimd.inc | 4 +- .../impl/asimd_load_store_structures.cpp | 68 ++++++++++++++++++- .../A32/translate/impl/translate_arm.h | 1 + 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/src/frontend/A32/decoder/asimd.inc b/src/frontend/A32/decoder/asimd.inc index e3344810..5638b23e 100644 --- a/src/frontend/A32/decoder/asimd.inc +++ b/src/frontend/A32/decoder/asimd.inc @@ -124,8 +124,8 @@ INST(v8_VST_multiple, "VST{1-4} (multiple)", "111101000D00nnnnddddxxx INST(v8_VLD_multiple, "VLD{1-4} (multiple)", "111101000D10nnnnddddxxxxzzaammmm") // v8 INST(arm_UDF, "UNALLOCATED", "111101000--0--------1011--------") // v8 INST(arm_UDF, "UNALLOCATED", "111101000--0--------11----------") // v8 -//INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 -//INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 +INST(arm_UDF, "UNALLOCATED", "111101001-00--------11----------") // v8 +INST(v8_VLD_all_lanes, "VLD{1-4} (all lanes)", "111101001D10nnnndddd11nnzzTammmm") // v8 //INST(arm_UDF, "UNALLOCATED", "111101001-10--------1110---1----") // v8 //INST(v8_VST_single, "VST{1-4} (single)", "111101001D00nnnnddddzzNNaaaammmm") // v8 //INST(v8_VLD_single, "VLD{1-4} (single)", "111101001D10nnnnddddzzNNaaaammmm") // v8 diff --git a/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp b/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp index 541c24f2..284c1a92 100644 --- a/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp +++ b/src/frontend/A32/translate/impl/asimd_load_store_structures.cpp @@ -148,9 +148,10 @@ bool ArmTranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type for (size_t r = 0; r < regs; r++) { for (size_t e = 0; e < elements; e++) { for (size_t i = 0; i < nelem; i++) { - const ExtReg ext_reg = d + i * inc + r; const IR::U64 element = ir.ZeroExtendToLong(ir.ReadMemory(ebytes * 8, address)); const IR::U64 shifted_element = ir.LogicalShiftLeft(element, ir.Imm8(static_cast(e * ebytes * 8))); + + const ExtReg ext_reg = d + i * inc + r; ir.SetExtendedRegister(ext_reg, ir.Or(ir.GetExtendedRegister(ext_reg), shifted_element)); address = ir.Add(address, ir.Imm32(static_cast(ebytes))); @@ -169,4 +170,69 @@ bool ArmTranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type return true; } +bool ArmTranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, size_t sz, bool T, bool a, Reg m) { + const size_t nelem = nn + 1; + + if (nelem == 1 && (sz == 0b11 || (sz == 0b00 && a))) { + return UndefinedInstruction(); + } + if (nelem == 2 && sz == 0b11) { + return UndefinedInstruction(); + } + if (nelem == 3 && (sz == 0b11 || a)) { + return UndefinedInstruction(); + } + if (nelem == 4 && (sz == 0b11 && !a)) { + return UndefinedInstruction(); + } + + const size_t ebytes = sz == 0b11 ? 4 : (1 << sz); + const size_t inc = T ? 2 : 1; + const size_t regs = nelem == 1 ? inc : 1; + [[maybe_unused]] const size_t alignment = [&]() -> size_t { + if (a && nelem == 1) { + return ebytes; + } + if (a && nelem == 2) { + return ebytes * 2; + } + if (a && nelem == 4) { + return sz >= 0b10 ? 2 * ebytes : 4 * ebytes; + } + return 1; + }(); + + const ExtReg d = ToExtRegD(Vd, D); + const size_t d_last = RegNumber(d) + inc * (nelem - 1); + if (n == Reg::R15 || d_last + regs > 32) { + return UnpredictableInstruction(); + } + + const bool wback = m != Reg::R15; + const bool register_index = m != Reg::R15 && m != Reg::R13; + + auto address = ir.GetRegister(n); + for (size_t i = 0; i < nelem; i++) { + const auto element = ir.ReadMemory(ebytes * 8, address); + const auto replicated_element = ir.VectorBroadcast(ebytes * 8, element); + + for (size_t r = 0; r < regs; r++) { + const ExtReg ext_reg = d + i * inc + r; + ir.SetVector(ext_reg, replicated_element); + } + + address = ir.Add(address, ir.Imm32(static_cast(ebytes))); + } + + if (wback) { + if (register_index) { + ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.GetRegister(m))); + } else { + ir.SetRegister(n, ir.Add(ir.GetRegister(n), ir.Imm32(static_cast(nelem * ebytes)))); + } + } + + return true; +} + } // namespace Dynarmic::A32 diff --git a/src/frontend/A32/translate/impl/translate_arm.h b/src/frontend/A32/translate/impl/translate_arm.h index c6b433e0..96f1ac50 100644 --- a/src/frontend/A32/translate/impl/translate_arm.h +++ b/src/frontend/A32/translate/impl/translate_arm.h @@ -544,6 +544,7 @@ struct ArmTranslatorVisitor final { // Advanced SIMD load/store structures bool v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t sz, size_t align, Reg m); bool v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t sz, size_t align, Reg m); + bool v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, size_t sz, bool T, bool a, Reg m); }; } // namespace Dynarmic::A32