diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index 3fdb39fa..b4875e1d 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -518,7 +518,7 @@ INST(USRA_1, "USRA", "01111 INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd") INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd") //INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd") -//INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") +INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd") INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd") // Data Processing - FP and SIMD - SIMD Extract diff --git a/src/frontend/A64/translate/impl/simd_permute.cpp b/src/frontend/A64/translate/impl/simd_permute.cpp index 921b06b1..5fe34298 100644 --- a/src/frontend/A64/translate/impl/simd_permute.cpp +++ b/src/frontend/A64/translate/impl/simd_permute.cpp @@ -4,43 +4,80 @@ * General Public License version 2 or any later version. */ +#include #include "frontend/A64/translate/impl/impl.h" namespace Dynarmic::A64 { -bool TranslatorVisitor::TRN1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { - if (!Q && size == 0b11) { - return ReservedValue(); - } +enum class Transposition { + TRN1, + TRN2, +}; +static void VectorTranspose(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd, + Transposition type) { const size_t datasize = Q ? 128 : 64; - const size_t esize = 8 << size.ZeroExtend(); + const u8 esize = static_cast(8 << size.ZeroExtend()); - const IR::U128 m = V(datasize, Vm); - const IR::U128 n = V(datasize, Vn); + const IR::U128 m = v.V(datasize, Vm); + const IR::U128 n = v.V(datasize, Vn); const IR::U128 result = [&] { switch (esize) { case 8: case 16: case 32: { - // Create a mask of elements we care about (e.g. for 8-bit 0x00FF00FF00FF00FF) - const u64 mask_element = Common::Ones(esize); - const u64 mask_value = Common::Replicate(mask_element, esize * 2); + // Create a mask of elements we care about (e.g. for 8-bit: 0x00FF00FF00FF00FF for TRN1 + // and 0xFF00FF00FF00FF00 for TRN2) + const u64 mask_element = [&] { + const size_t shift = type == Transposition::TRN1 ? 0 : esize; + return Common::Ones(esize) << shift; + }(); + const size_t doubled_esize = esize * 2; + const u64 mask_value = Common::Replicate(mask_element, doubled_esize); - const IR::U128 mask = ir.VectorBroadcast(64, I(64, mask_value)); - const IR::U128 anded_m = ir.VectorAnd(m, mask); - const IR::U128 anded_n = ir.VectorAnd(n, mask); - return ir.VectorOr(ir.VectorLogicalShiftLeft(esize * 2, anded_m, esize), anded_n); + const IR::U128 mask = v.ir.VectorBroadcast(64, v.I(64, mask_value)); + const IR::U128 anded_m = v.ir.VectorAnd(m, mask); + const IR::U128 anded_n = v.ir.VectorAnd(n, mask); + + if (type == Transposition::TRN1) { + return v.ir.VectorOr(v.ir.VectorLogicalShiftLeft(doubled_esize, anded_m, esize), anded_n); + } + + return v.ir.VectorOr(v.ir.VectorLogicalShiftRight(doubled_esize, anded_n, esize), anded_m); } case 64: { default: - return ir.VectorSetElement(esize, n, 1, ir.VectorGetElement(esize, m, 0)); + const auto [src, src_idx, dst, dst_idx] = [type, m, n] { + if (type == Transposition::TRN1) { + return std::make_tuple(m, 0, n, 1); + } + return std::make_tuple(n, 1, m, 0); + }(); + + return v.ir.VectorSetElement(esize, dst, dst_idx, v.ir.VectorGetElement(esize, src, src_idx)); } } }(); - V(datasize, Vd, result); + v.V(datasize, Vd, result); +} + +bool TranslatorVisitor::TRN1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (!Q && size == 0b11) { + return ReservedValue(); + } + + VectorTranspose(*this, Q, size, Vm, Vn, Vd, Transposition::TRN1); + return true; +} + +bool TranslatorVisitor::TRN2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { + if (!Q && size == 0b11) { + return ReservedValue(); + } + + VectorTranspose(*this, Q, size, Vm, Vn, Vd, Transposition::TRN2); return true; }