A64: Implement ZIP1
This commit is contained in:
parent
586854117b
commit
35a29a9665
7 changed files with 105 additions and 1 deletions
|
@ -94,6 +94,7 @@ add_library(dynarmic
|
||||||
frontend/A64/translate/impl/simd_aes.cpp
|
frontend/A64/translate/impl/simd_aes.cpp
|
||||||
frontend/A64/translate/impl/simd_copy.cpp
|
frontend/A64/translate/impl/simd_copy.cpp
|
||||||
frontend/A64/translate/impl/simd_modified_immediate.cpp
|
frontend/A64/translate/impl/simd_modified_immediate.cpp
|
||||||
|
frontend/A64/translate/impl/simd_permute.cpp
|
||||||
frontend/A64/translate/impl/simd_scalar_three_same.cpp
|
frontend/A64/translate/impl/simd_scalar_three_same.cpp
|
||||||
frontend/A64/translate/impl/simd_three_same.cpp
|
frontend/A64/translate/impl/simd_three_same.cpp
|
||||||
frontend/A64/translate/impl/system.cpp
|
frontend/A64/translate/impl/system.cpp
|
||||||
|
|
|
@ -390,6 +390,46 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
switch (size) {
|
||||||
|
case 8:
|
||||||
|
code.punpcklbw(a, b);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.punpcklwd(a, b);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.punpckldq(a, b);
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
code.punpcklqdq(a, b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorInterleaveLower8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorInterleaveLower(code, ctx, inst, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorInterleaveLower16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorInterleaveLower(code, ctx, inst, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorInterleaveLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorInterleaveLower(code, ctx, inst, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitVectorInterleaveLower(code, ctx, inst, 64);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorLowerPairedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
|
|
@ -583,7 +583,7 @@ INST(SUB_1, "SUB (vector)", "01111
|
||||||
// Data Processing - FP and SIMD - SIMD Permute
|
// Data Processing - FP and SIMD - SIMD Permute
|
||||||
//INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd")
|
//INST(UZP1, "UZP1", "0Q001110zz0mmmmm000110nnnnnddddd")
|
||||||
//INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd")
|
//INST(TRN1, "TRN1", "0Q001110zz0mmmmm001010nnnnnddddd")
|
||||||
//INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd")
|
INST(ZIP1, "ZIP1", "0Q001110zz0mmmmm001110nnnnnddddd")
|
||||||
//INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd")
|
//INST(UZP2, "UZP2", "0Q001110zz0mmmmm010110nnnnnddddd")
|
||||||
//INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd")
|
//INST(TRN2, "TRN2", "0Q001110zz0mmmmm011010nnnnnddddd")
|
||||||
//INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd")
|
//INST(ZIP2, "ZIP2", "0Q001110zz0mmmmm011110nnnnnddddd")
|
||||||
|
|
39
src/frontend/A64/translate/impl/simd_permute.cpp
Normal file
39
src/frontend/A64/translate/impl/simd_permute.cpp
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
/* This file is part of the dynarmic project.
|
||||||
|
* Copyright (c) 2018 MerryMage
|
||||||
|
* This software may be used and distributed according to the terms of the GNU
|
||||||
|
* General Public License version 2 or any later version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "frontend/A64/translate/impl/impl.h"
|
||||||
|
|
||||||
|
namespace Dynarmic::A64 {
|
||||||
|
|
||||||
|
bool TranslatorVisitor::ZIP1(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
|
||||||
|
if (size == 0b11 && !Q) {
|
||||||
|
return ReservedValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t datasize = Q ? 128 : 64;
|
||||||
|
|
||||||
|
const IR::U128 result = [&] {
|
||||||
|
const IR::U128 operand1 = V(datasize, Vn);
|
||||||
|
const IR::U128 operand2 = V(datasize, Vm);
|
||||||
|
|
||||||
|
switch (size.ZeroExtend()) {
|
||||||
|
case 0b00:
|
||||||
|
return ir.VectorInterleaveLower8(operand1, operand2);
|
||||||
|
case 0b01:
|
||||||
|
return ir.VectorInterleaveLower16(operand1, operand2);
|
||||||
|
case 0b10:
|
||||||
|
return ir.VectorInterleaveLower32(operand1, operand2);
|
||||||
|
case 0b11:
|
||||||
|
default:
|
||||||
|
return ir.VectorInterleaveLower64(operand1, operand2);
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
V(datasize, Vd, result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Dynarmic::A64
|
|
@ -865,6 +865,22 @@ U128 IREmitter::VectorEqual128(const U128& a, const U128& b) {
|
||||||
return Inst<U128>(Opcode::VectorEqual128, a, b);
|
return Inst<U128>(Opcode::VectorEqual128, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorInterleaveLower8(const U128& a, const U128& b) {
|
||||||
|
return Inst<U128>(Opcode::VectorInterleaveLower8, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorInterleaveLower16(const U128& a, const U128& b) {
|
||||||
|
return Inst<U128>(Opcode::VectorInterleaveLower16, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorInterleaveLower32(const U128& a, const U128& b) {
|
||||||
|
return Inst<U128>(Opcode::VectorInterleaveLower32, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorInterleaveLower64(const U128& a, const U128& b) {
|
||||||
|
return Inst<U128>(Opcode::VectorInterleaveLower64, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorLowerPairedAdd8(const U128& a, const U128& b) {
|
U128 IREmitter::VectorLowerPairedAdd8(const U128& a, const U128& b) {
|
||||||
return Inst<U128>(Opcode::VectorLowerPairedAdd8, a, b);
|
return Inst<U128>(Opcode::VectorLowerPairedAdd8, a, b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,6 +227,10 @@ public:
|
||||||
U128 VectorEqual32(const U128& a, const U128& b);
|
U128 VectorEqual32(const U128& a, const U128& b);
|
||||||
U128 VectorEqual64(const U128& a, const U128& b);
|
U128 VectorEqual64(const U128& a, const U128& b);
|
||||||
U128 VectorEqual128(const U128& a, const U128& b);
|
U128 VectorEqual128(const U128& a, const U128& b);
|
||||||
|
U128 VectorInterleaveLower8(const U128& a, const U128& b);
|
||||||
|
U128 VectorInterleaveLower16(const U128& a, const U128& b);
|
||||||
|
U128 VectorInterleaveLower32(const U128& a, const U128& b);
|
||||||
|
U128 VectorInterleaveLower64(const U128& a, const U128& b);
|
||||||
U128 VectorLowerPairedAdd8(const U128& a, const U128& b);
|
U128 VectorLowerPairedAdd8(const U128& a, const U128& b);
|
||||||
U128 VectorLowerPairedAdd16(const U128& a, const U128& b);
|
U128 VectorLowerPairedAdd16(const U128& a, const U128& b);
|
||||||
U128 VectorLowerPairedAdd32(const U128& a, const U128& b);
|
U128 VectorLowerPairedAdd32(const U128& a, const U128& b);
|
||||||
|
|
|
@ -218,6 +218,10 @@ OPCODE(VectorEqual16, T::U128, T::U128, T::U128
|
||||||
OPCODE(VectorEqual32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorEqual32, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorEqual64, T::U128, T::U128, T::U128 )
|
OPCODE(VectorEqual64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorEqual128, T::U128, T::U128, T::U128 )
|
OPCODE(VectorEqual128, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
|
||||||
|
OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorLowerPairedAdd8, T::U128, T::U128, T::U128 )
|
OPCODE(VectorLowerPairedAdd8, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorLowerPairedAdd16, T::U128, T::U128, T::U128 )
|
OPCODE(VectorLowerPairedAdd16, T::U128, T::U128, T::U128 )
|
||||||
OPCODE(VectorLowerPairedAdd32, T::U128, T::U128, T::U128 )
|
OPCODE(VectorLowerPairedAdd32, T::U128, T::U128, T::U128 )
|
||||||
|
|
Loading…
Reference in a new issue