Merge pull request #532 from lioncash/shift
A32: Implement several ASIMD shift instructions
This commit is contained in:
commit
687c604197
5 changed files with 205 additions and 7 deletions
|
@ -127,6 +127,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||||
frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp
|
frontend/A32/translate/impl/asimd_one_reg_modified_immediate.cpp
|
||||||
frontend/A32/translate/impl/asimd_three_same.cpp
|
frontend/A32/translate/impl/asimd_three_same.cpp
|
||||||
frontend/A32/translate/impl/asimd_two_regs_misc.cpp
|
frontend/A32/translate/impl/asimd_two_regs_misc.cpp
|
||||||
|
frontend/A32/translate/impl/asimd_two_regs_shift.cpp
|
||||||
frontend/A32/translate/impl/barrier.cpp
|
frontend/A32/translate/impl/barrier.cpp
|
||||||
frontend/A32/translate/impl/branch.cpp
|
frontend/A32/translate/impl/branch.cpp
|
||||||
frontend/A32/translate/impl/coprocessor.cpp
|
frontend/A32/translate/impl/coprocessor.cpp
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/bit_util.h"
|
#include "common/bit_util.h"
|
||||||
|
@ -35,6 +36,15 @@ std::vector<ASIMDMatcher<V>> GetASIMDDecodeTable() {
|
||||||
return Common::BitCount(matcher1.GetMask()) > Common::BitCount(matcher2.GetMask());
|
return Common::BitCount(matcher1.GetMask()) > Common::BitCount(matcher2.GetMask());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Exceptions to the above rule of thumb.
|
||||||
|
const std::set<std::string> comes_first{
|
||||||
|
"VBIC, VMOV, VMVN, VORR (immediate)"
|
||||||
|
};
|
||||||
|
|
||||||
|
std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) {
|
||||||
|
return comes_first.count(matcher.GetName()) > 0;
|
||||||
|
});
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,13 +58,13 @@ INST(asimd_VTST, "VTST", "111100100Dzznnnndddd100
|
||||||
//INST(asimd_VQRDMULH, "VQRDMULH", "1111001U1-BB--------1101-1-0----") // ASIMD
|
//INST(asimd_VQRDMULH, "VQRDMULH", "1111001U1-BB--------1101-1-0----") // ASIMD
|
||||||
|
|
||||||
// Two registers and a shift amount
|
// Two registers and a shift amount
|
||||||
//INST(asimd_SHR, "SHR", "1111001U1-vvv-------0000LB-1----") // ASIMD
|
INST(asimd_SHR, "SHR", "1111001U1Diiiiiidddd0000LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_SRA, "SRA", "1111001U1-vvv-------0001LB-1----") // ASIMD
|
INST(asimd_SRA, "SRA", "1111001U1Diiiiiidddd0001LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VRSHR, "VRSHR", "1111001U1-vvv-------0010LB-1----") // ASIMD
|
INST(asimd_VRSHR, "VRSHR", "1111001U1Diiiiiidddd0010LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VRSRA, "VRSRA", "1111001U1-vvv-------0011LB-1----") // ASIMD
|
INST(asimd_VRSRA, "VRSRA", "1111001U1Diiiiiidddd0011LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VSRI, "VSRI", "111100111-vvv-------0100LB-1----") // ASIMD
|
INST(asimd_VSRI, "VSRI", "111100111Diiiiiidddd0100LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VSHL, "VSHL", "111100101-vvv-------0101LB-1----") // ASIMD
|
INST(asimd_VSHL, "VSHL", "111100101Diiiiiidddd0101LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VSLI, "VSLI", "111100111-vvv-------0101LB-1----") // ASIMD
|
INST(asimd_VSLI, "VSLI", "111100111Diiiiiidddd0101LQM1mmmm") // ASIMD
|
||||||
//INST(asimd_VQSHL, "VQSHL" , "1111001U1-vvv-------011xLB-1----") // ASIMD
|
//INST(asimd_VQSHL, "VQSHL" , "1111001U1-vvv-------011xLB-1----") // ASIMD
|
||||||
//INST(asimd_VSHRN, "VSHRN", "111100101-vvv-------100000-1----") // ASIMD
|
//INST(asimd_VSHRN, "VSHRN", "111100101-vvv-------100000-1----") // ASIMD
|
||||||
//INST(asimd_VRSHRN, "VRSHRN", "111100101-vvv-------100001-1----") // ASIMD
|
//INST(asimd_VRSHRN, "VRSHRN", "111100101-vvv-------100001-1----") // ASIMD
|
||||||
|
|
178
src/frontend/A32/translate/impl/asimd_two_regs_shift.cpp
Normal file
178
src/frontend/A32/translate/impl/asimd_two_regs_shift.cpp
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2020 MerryMage
|
||||||
|
* SPDX-License-Identifier: 0BSD
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
|
|
||||||
|
#include "frontend/A32/translate/impl/translate_arm.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::A32 {
|
||||||
|
namespace {
|
||||||
|
enum class Accumulating {
|
||||||
|
None,
|
||||||
|
Accumulate
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class Rounding {
|
||||||
|
None,
|
||||||
|
Round,
|
||||||
|
};
|
||||||
|
|
||||||
|
IR::U128 PerformRoundingCorrection(ArmTranslatorVisitor& v, size_t esize, u64 round_value, IR::U128 original, IR::U128 shifted) {
|
||||||
|
const auto round_const = v.ir.VectorBroadcast(esize, v.I(esize, round_value));
|
||||||
|
const auto round_correction = v.ir.VectorEqual(esize, v.ir.VectorAnd(original, round_const), round_const);
|
||||||
|
return v.ir.VectorSub(esize, shifted, round_correction);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<size_t, size_t> ElementSizeAndShiftAmount(bool right_shift, bool L, size_t imm6) {
|
||||||
|
if (right_shift) {
|
||||||
|
if (L) {
|
||||||
|
return {64, 64 - imm6};
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8U << Common::HighestSetBit(imm6 >> 3);
|
||||||
|
const size_t shift_amount = (esize * 2) - imm6;
|
||||||
|
return {esize, shift_amount};
|
||||||
|
} else {
|
||||||
|
if (L) {
|
||||||
|
return {64, imm6};
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t esize = 8U << Common::HighestSetBit(imm6 >> 3);
|
||||||
|
const size_t shift_amount = imm6 - esize;
|
||||||
|
return {esize, shift_amount};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShiftRight(ArmTranslatorVisitor& v, bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm,
|
||||||
|
Accumulating accumulate, Rounding rounding) {
|
||||||
|
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vm))) {
|
||||||
|
return v.UndefinedInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically just a related encoding (One register and modified immediate instructions)
|
||||||
|
if (!L && Common::Bits<3, 5>(imm6) == 0) {
|
||||||
|
ASSERT_FALSE();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [esize, shift_amount] = ElementSizeAndShiftAmount(true, L, imm6);
|
||||||
|
const auto d = ToVector(Q, Vd, D);
|
||||||
|
const auto m = ToVector(Q, Vm, M);
|
||||||
|
|
||||||
|
const auto reg_m = v.ir.GetVector(m);
|
||||||
|
auto result = U ? v.ir.VectorLogicalShiftRight(esize, reg_m, static_cast<u8>(shift_amount))
|
||||||
|
: v.ir.VectorArithmeticShiftRight(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
|
|
||||||
|
if (rounding == Rounding::Round) {
|
||||||
|
const u64 round_value = 1ULL << (shift_amount - 1);
|
||||||
|
result = PerformRoundingCorrection(v, esize, round_value, reg_m, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accumulate == Accumulating::Accumulate) {
|
||||||
|
const auto reg_d = v.ir.GetVector(d);
|
||||||
|
result = v.ir.VectorAdd(esize, result, reg_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
v.ir.SetVector(d, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_SHR(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
return ShiftRight(*this, U, D, imm6, Vd, L, Q, M, Vm,
|
||||||
|
Accumulating::None, Rounding::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_SRA(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
return ShiftRight(*this, U, D, imm6, Vd, L, Q, M, Vm,
|
||||||
|
Accumulating::Accumulate, Rounding::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VRSHR(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
return ShiftRight(*this, U, D, imm6, Vd, L, Q, M, Vm,
|
||||||
|
Accumulating::None, Rounding::Round);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VRSRA(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
return ShiftRight(*this, U, D, imm6, Vd, L, Q, M, Vm,
|
||||||
|
Accumulating::Accumulate, Rounding::Round);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VSRI(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vm))) {
|
||||||
|
return UndefinedInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically just a related encoding (One register and modified immediate instructions)
|
||||||
|
if (!L && Common::Bits<3, 5>(imm6) == 0) {
|
||||||
|
ASSERT_FALSE();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [esize, shift_amount] = ElementSizeAndShiftAmount(true, L, imm6);
|
||||||
|
const u64 mask = shift_amount == esize ? 0 : Common::Ones<u64>(esize) >> shift_amount;
|
||||||
|
|
||||||
|
const auto d = ToVector(Q, Vd, D);
|
||||||
|
const auto m = ToVector(Q, Vm, M);
|
||||||
|
|
||||||
|
const auto reg_m = ir.GetVector(m);
|
||||||
|
const auto reg_d = ir.GetVector(d);
|
||||||
|
|
||||||
|
const auto shifted = ir.VectorLogicalShiftRight(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
|
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
|
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
|
||||||
|
|
||||||
|
ir.SetVector(d, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VSLI(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vm))) {
|
||||||
|
return UndefinedInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically just a related encoding (One register and modified immediate instructions)
|
||||||
|
if (!L && Common::Bits<3, 5>(imm6) == 0) {
|
||||||
|
return UndefinedInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [esize, shift_amount] = ElementSizeAndShiftAmount(false, L, imm6);
|
||||||
|
const u64 mask = Common::Ones<u64>(esize) << shift_amount;
|
||||||
|
|
||||||
|
const auto d = ToVector(Q, Vd, D);
|
||||||
|
const auto m = ToVector(Q, Vm, M);
|
||||||
|
|
||||||
|
const auto reg_m = ir.GetVector(m);
|
||||||
|
const auto reg_d = ir.GetVector(d);
|
||||||
|
|
||||||
|
const auto shifted = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
|
const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask));
|
||||||
|
const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted);
|
||||||
|
|
||||||
|
ir.SetVector(d, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ArmTranslatorVisitor::asimd_VSHL(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm) {
|
||||||
|
if (Q && (Common::Bit<0>(Vd) || Common::Bit<0>(Vm))) {
|
||||||
|
return UndefinedInstruction();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Technically just a related encoding (One register and modified immediate instructions)
|
||||||
|
if (!L && Common::Bits<3, 5>(imm6) == 0) {
|
||||||
|
ASSERT_FALSE();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto [esize, shift_amount] = ElementSizeAndShiftAmount(false, L, imm6);
|
||||||
|
const auto d = ToVector(Q, Vd, D);
|
||||||
|
const auto m = ToVector(Q, Vm, M);
|
||||||
|
|
||||||
|
const auto reg_m = ir.GetVector(m);
|
||||||
|
const auto result = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast<u8>(shift_amount));
|
||||||
|
|
||||||
|
ir.SetVector(d, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::A32
|
|
@ -452,6 +452,15 @@ struct ArmTranslatorVisitor final {
|
||||||
bool asimd_VQSUB(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VQSUB(bool U, bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VTST(bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
bool asimd_VTST(bool D, size_t sz, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm);
|
||||||
|
|
||||||
|
// Two registers and a shift amount
|
||||||
|
bool asimd_SHR(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_SRA(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VRSHR(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VRSRA(bool U, bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VSRI(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VSLI(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
bool asimd_VSHL(bool D, size_t imm6, size_t Vd, bool L, bool Q, bool M, size_t Vm);
|
||||||
|
|
||||||
// Advanced SIMD two register, miscellaneous
|
// Advanced SIMD two register, miscellaneous
|
||||||
bool asimd_VREV(bool D, size_t sz, size_t Vd, size_t op, bool Q, bool M, size_t Vm);
|
bool asimd_VREV(bool D, size_t sz, size_t Vd, size_t op, bool Q, bool M, size_t Vm);
|
||||||
bool asimd_VCLS(bool D, size_t sz, size_t Vd, bool Q, bool M, size_t Vm);
|
bool asimd_VCLS(bool D, size_t sz, size_t Vd, bool Q, bool M, size_t Vm);
|
||||||
|
|
Loading…
Reference in a new issue