A64: Implement MUL (by element)
This commit is contained in:
parent
a86d4093cd
commit
1651e60462
3 changed files with 45 additions and 26 deletions
|
@ -823,7 +823,7 @@ INST(USHLL, "USHLL, USHLL2", "0Q101
|
|||
//INST(SQDMLAL_elt_2, "SQDMLAL, SQDMLAL2 (by element)", "0Q001111zzLMmmmm0011H0nnnnnddddd")
|
||||
//INST(SMLSL_elt, "SMLSL, SMLSL2 (by element)", "0Q001111zzLMmmmm0110H0nnnnnddddd")
|
||||
//INST(SQDMLSL_elt_2, "SQDMLSL, SQDMLSL2 (by element)", "0Q001111zzLMmmmm0111H0nnnnnddddd")
|
||||
//INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd")
|
||||
INST(MUL_elt, "MUL (by element)", "0Q001111zzLMmmmm1000H0nnnnnddddd")
|
||||
//INST(SMULL_elt, "SMULL, SMULL2 (by element)", "0Q001111zzLMmmmm1010H0nnnnnddddd")
|
||||
//INST(SQDMULL_elt_2, "SQDMULL, SQDMULL2 (by element)", "0Q001111zzLMmmmm1011H0nnnnnddddd")
|
||||
//INST(SQDMULH_elt_2, "SQDMULH (by element)", "0Q001111zzLMmmmm1100H0nnnnnddddd")
|
||||
|
|
|
@ -901,7 +901,7 @@ struct TranslatorVisitor final {
|
|||
// Data Processing - FP and SIMD - SIMD vector x indexed element
|
||||
bool SMLAL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
bool SMLSL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
bool MUL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
bool MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd);
|
||||
bool SMULL_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
bool SDOT_elt(bool Q, Imm<2> size, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
bool FMLAL_elt_1(bool Q, bool sz, bool L, bool M, Vec Vm, bool H, Vec Vn, Vec Vd);
|
||||
|
|
|
@ -4,40 +4,59 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
#include "frontend/A64/translate/impl/impl.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
namespace {
|
||||
std::pair<size_t, Vec> Combine(Imm<2> size, Imm<1> H, Imm<1> L, Imm<1> M, Imm<4> Vmlo) {
|
||||
if (size == 0b01) {
|
||||
return {concatenate(H, L, M).ZeroExtend(), Vmlo.ZeroExtend<Vec>()};
|
||||
}
|
||||
|
||||
return {concatenate(H, L).ZeroExtend(), concatenate(M, Vmlo).ZeroExtend<Vec>()};
|
||||
}
|
||||
|
||||
enum class ExtraBehavior {
|
||||
None,
|
||||
Accumulate,
|
||||
};
|
||||
|
||||
void MultiplyByElement(TranslatorVisitor& v, bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd,
|
||||
ExtraBehavior extra_behavior) {
|
||||
const auto [index, Vm] = Combine(size, H, L, M, Vmlo);
|
||||
const size_t idxdsize = H == 1 ? 128 : 64;
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
const IR::U128 operand1 = v.V(datasize, Vn);
|
||||
const IR::U128 operand2 = v.ir.VectorBroadcast(esize, v.ir.VectorGetElement(esize, v.V(idxdsize, Vm), index));
|
||||
const IR::U128 operand3 = v.V(datasize, Vd);
|
||||
|
||||
IR::U128 result = v.ir.VectorMultiply(esize, operand1, operand2);
|
||||
if (extra_behavior == ExtraBehavior::Accumulate) {
|
||||
result = v.ir.VectorAdd(esize, operand3, result);
|
||||
}
|
||||
|
||||
v.V(datasize, Vd, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
bool TranslatorVisitor::MLA_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||
const size_t idxdsize = H == 1 ? 128 : 64;
|
||||
|
||||
size_t index;
|
||||
Imm<1> Vmhi{0};
|
||||
switch (size.ZeroExtend()) {
|
||||
case 0b01:
|
||||
index = concatenate(H, L, M).ZeroExtend();
|
||||
break;
|
||||
case 0b10:
|
||||
index = concatenate(H, L).ZeroExtend();
|
||||
Vmhi = M;
|
||||
break;
|
||||
default:
|
||||
if (size != 0b01 && size != 0b10) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
const Vec Vm = concatenate(Vmhi, Vmlo).ZeroExtend<Vec>();
|
||||
MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::Accumulate);
|
||||
return true;
|
||||
}
|
||||
|
||||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
const IR::U128 operand1 = V(datasize, Vn);
|
||||
const IR::U128 operand2 = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, V(idxdsize, Vm), index));
|
||||
const IR::U128 operand3 = V(datasize, Vd);
|
||||
|
||||
const IR::U128 product = ir.VectorMultiply(esize, operand1, operand2);
|
||||
const IR::U128 result = ir.VectorAdd(esize, operand3, product);
|
||||
V(datasize, Vd, result);
|
||||
bool TranslatorVisitor::MUL_elt(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, Imm<4> Vmlo, Imm<1> H, Vec Vn, Vec Vd) {
|
||||
if (size != 0b01 && size != 0b10) {
|
||||
return UnallocatedEncoding();
|
||||
}
|
||||
|
||||
MultiplyByElement(*this, Q, size, L, M, Vmlo, H, Vn, Vd, ExtraBehavior::None);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue