diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 597698bb..62e81528 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -77,6 +77,7 @@ add_library(dynarmic frontend/A64/translate/impl/impl.cpp frontend/A64/translate/impl/impl.h frontend/A64/translate/impl/load_store_load_literal.cpp + frontend/A64/translate/impl/load_store_multiple_structures.cpp frontend/A64/translate/impl/load_store_register_immediate.cpp frontend/A64/translate/impl/load_store_register_pair.cpp frontend/A64/translate/impl/load_store_register_unprivileged.cpp diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 68d01fc1..c7ff429e 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -92,22 +92,10 @@ INST(TBZ, "TBZ", "b0110 INST(TBNZ, "TBNZ", "b0110111bbbbbiiiiiiiiiiiiiittttt") // Loads and stores - Advanced SIMD Load/Store multiple structures -//INST(ST4_mult_1, "ST4 (multiple structures)", "0Q001100000000000000zznnnnnttttt") -//INST(ST4_mult_2, "ST4 (multiple structures)", "0Q001100100mmmmm0000zznnnnnttttt") -//INST(ST1_mult_1, "ST1 (multiple structures)", "0Q00110000000000--1-zznnnnnttttt") -//INST(ST1_mult_2, "ST1 (multiple structures)", "0Q001100100mmmmm--1-zznnnnnttttt") -//INST(ST3_mult_1, "ST3 (multiple structures)", "0Q001100000000000100zznnnnnttttt") -//INST(ST3_mult_2, "ST3 (multiple structures)", "0Q001100100mmmmm0100zznnnnnttttt") -//INST(ST2_mult_1, "ST2 (multiple structures)", "0Q001100000000001000zznnnnnttttt") -//INST(ST2_mult_2, "ST2 (multiple structures)", "0Q001100100mmmmm1000zznnnnnttttt") -//INST(LD4_mult_1, "LD4 (multiple structures)", "0Q001100010000000000zznnnnnttttt") -//INST(LD4_mult_2, "LD4 (multiple structures)", "0Q001100110mmmmm0000zznnnnnttttt") -//INST(LD1_mult_1, "LD1 (multiple structures)", "0Q00110001000000--1-zznnnnnttttt") -//INST(LD1_mult_2, "LD1 (multiple structures)", "0Q001100110mmmmm--1-zznnnnnttttt") -//INST(LD3_mult_1, "LD3 (multiple structures)", "0Q001100010000000100zznnnnnttttt") -//INST(LD3_mult_2, "LD3 (multiple structures)", "0Q001100110mmmmm0100zznnnnnttttt") -//INST(LD2_mult_1, "LD2 (multiple structures)", "0Q001100010000001000zznnnnnttttt") -//INST(LD2_mult_2, "LD2 (multiple structures)", "0Q001100110mmmmm1000zznnnnnttttt") +INST(STx_mult_1, "STx (multiple structures)", "0Q00110000000000oooozznnnnnttttt") +INST(STx_mult_2, "STx (multiple structures)", "0Q001100100mmmmmoooozznnnnnttttt") +INST(LDx_mult_1, "LDx (multiple structures)", "0Q00110001000000oooozznnnnnttttt") +INST(LDx_mult_2, "LDx (multiple structures)", "0Q001100110mmmmmoooozznnnnnttttt") // Loads and stores - Advanced SIMD Load/Store single structures //INST(ST1_sngl_1, "ST1 (single structure)", "0Q00110100000000--0Szznnnnnttttt") diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index e94d251e..59ee16ac 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -152,22 +152,10 @@ struct TranslatorVisitor final { bool TBNZ(Imm<1> b5, Imm<5> b40, Imm<14> imm14, Reg Rt); // Loads and stores - Advanced SIMD Load/Store multiple structures - bool ST4_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool ST4_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool ST1_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool ST1_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool ST3_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool ST3_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool ST2_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool ST2_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool LD4_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool LD4_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool LD1_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool LD1_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool LD3_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool LD3_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); - bool LD2_mult_1(bool Q, Imm<2> size, Reg Rn, Reg Rt); - bool LD2_mult_2(bool Q, Reg Rm, Imm<2> size, Reg Rn, Reg Rt); + bool STx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt); + bool STx_mult_2(bool Q, Reg Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt); + bool LDx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt); + bool LDx_mult_2(bool Q, Reg Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt); // Loads and stores - Advanced SIMD Load/Store single structures bool ST1_sngl_1(bool Q, bool S, Imm<2> size, Reg Rn, Vec Vt); diff --git a/src/frontend/A64/translate/impl/load_store_multiple_structures.cpp b/src/frontend/A64/translate/impl/load_store_multiple_structures.cpp new file mode 100644 index 00000000..5586fe62 --- /dev/null +++ b/src/frontend/A64/translate/impl/load_store_multiple_structures.cpp @@ -0,0 +1,120 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include + +#include "frontend/A64/translate/impl/impl.h" + +namespace Dynarmic::A64 { + +static bool SharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, bool wback, MemOp memop, bool Q, boost::optional Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) { + const size_t datasize = Q ? 128 : 64; + const size_t esize = 8 << size.ZeroExtend(); + const size_t elements = datasize / esize; + const size_t ebytes = esize / 8; + + size_t rpt, selem; + switch (opcode.ZeroExtend()) { + case 0b0000: + rpt = 1; + selem = 4; + break; + case 0b0010: + rpt = 4; + selem = 1; + break; + case 0b0100: + rpt = 1; + selem = 3; + break; + case 0b0110: + rpt = 3; + selem = 1; + break; + case 0b0111: + rpt = 1; + selem = 1; + break; + case 0b1000: + rpt = 1; + selem = 2; + break; + case 0b1010: + rpt = 2; + selem = 1; + break; + default: + return tv.UnallocatedEncoding(); + } + + if ((size == 0b11 && !Q) && selem != 1) { + return tv.ReservedValue(); + } + + IR::U64 address; + if (Rn == Reg::SP) + // TODO: Check SP Alignment + address = tv.SP(64); + else + address = tv.X(64, Rn); + + IR::U64 offs = ir.Imm64(0); + for (size_t r = 0; r < rpt; r++) { + for (size_t e = 0; e < elements; e++) { + for (size_t s = 0; s < selem; s++) { + const Vec tt = static_cast((VecNumber(Vt) + r + s) % 32); + if (memop == MemOp::LOAD) { + const IR::UAny elem = tv.Mem(ir.Add(address, offs), ebytes, AccType::VEC); + const IR::U128 vec = ir.VectorSetElement(esize, tv.V(datasize, tt), e, elem); + tv.V(datasize, tt, vec); + } else { + const IR::UAny elem = ir.VectorGetElement(esize, tv.V(datasize, tt), e); + tv.Mem(ir.Add(address, offs), ebytes, AccType::VEC, elem); + } + offs = ir.Add(offs, ir.Imm64(ebytes)); + } + } + } + + if (wback) { + if (*Rm != Reg::SP) + offs = tv.X(64, *Rm); + if (Rn == Reg::SP) + tv.SP(64, ir.Add(address, offs)); + else + tv.X(64, Rn, ir.Add(address, offs)); + } + + return true; +} + +bool TranslatorVisitor::STx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) { + const bool wback = false; + const MemOp memop = MemOp::STORE; + return SharedDecodeAndOperation(*this, ir, wback, memop, Q, {}, opcode, size, Rn, Vt); +} + +bool TranslatorVisitor::STx_mult_2(bool Q, Reg Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) { + const bool wback = true; + const MemOp memop = MemOp::STORE; + return SharedDecodeAndOperation(*this, ir, wback, memop, Q, Rm, opcode, size, Rn, Vt); +} + +bool TranslatorVisitor::LDx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) { + const bool wback = false; + const MemOp memop = MemOp::LOAD; + return SharedDecodeAndOperation(*this, ir, wback, memop, Q, {}, opcode, size, Rn, Vt); +} + +bool TranslatorVisitor::LDx_mult_2(bool Q, Reg Rm, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) { + const bool wback = true; + const MemOp memop = MemOp::LOAD; + return SharedDecodeAndOperation(*this, ir, wback, memop, Q, Rm, opcode, size, Rn, Vt); +} + +} // namespace Dynarmic::A64 diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index bd105669..8d1eac5e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -187,10 +187,10 @@ OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 ) OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 ) OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 ) -OPCODE(VectorSetElement8, T::U128, T::U128, T::U8, T::U8 ) -OPCODE(VectorSetElement16, T::U128, T::U128, T::U8, T::U16 ) -OPCODE(VectorSetElement32, T::U128, T::U128, T::U8, T::U32 ) -OPCODE(VectorSetElement64, T::U128, T::U128, T::U8, T::U64 ) +OPCODE(VectorSetElement8, T::U128, T::U128, T::U8, T::U8 ) +OPCODE(VectorSetElement16, T::U128, T::U128, T::U8, T::U16 ) +OPCODE(VectorSetElement32, T::U128, T::U128, T::U8, T::U32 ) +OPCODE(VectorSetElement64, T::U128, T::U128, T::U8, T::U64 ) OPCODE(VectorAdd8, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )