A64: Implement SADDLP

This commit is contained in:
MerryMage 2018-07-15 18:48:11 +01:00
parent 70ff2d73b5
commit 9dba273a8c
6 changed files with 91 additions and 8 deletions

View file

@ -1735,18 +1735,65 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code.movdqa(c, a);
code.psllw(a, 8);
code.psraw(c, 8);
code.psraw(a, 8);
code.paddw(a, c);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code.movdqa(c, a);
code.pslld(a, 16);
code.psrad(c, 16);
code.psrad(a, 16);
code.paddd(a, c);
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm();
code.movdqa(c, a);
code.psllq(a, 32);
code.psraq(c, 32);
code.psraq(a, 32);
code.paddq(a, c);
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) {
code.vpsraq(c, a, 32);
code.vpsllq(a, a, 32);
code.vpsraq(a, a, 32);
code.vpaddq(a, a, c);
} else {
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
code.movdqa(c, a);
code.psllq(a, 32);
code.movdqa(tmp1, code.MConst(xword, 0x80000000'00000000, 0x80000000'00000000));
code.movdqa(tmp2, tmp1);
code.pand(tmp1, a);
code.pand(tmp2, c);
code.psrlq(a, 32);
code.psrlq(c, 32);
code.psrad(tmp1, 31);
code.psrad(tmp2, 31);
code.por(a, tmp1);
code.por(c, tmp2);
code.paddq(a, c);
}
ctx.reg_alloc.DefineValue(inst, a);
}

View file

@ -568,7 +568,7 @@ INST(INS_elt, "INS (element)", "01101
// Data Processing - FP and SIMD - SIMD Two-register misc
INST(REV64_asimd, "REV64", "0Q001110zz100000000010nnnnnddddd")
INST(REV16_asimd, "REV16 (vector)", "0Q001110zz100000000110nnnnnddddd")
//INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd")
INST(SADDLP, "SADDLP", "0Q001110zz100000001010nnnnnddddd")
//INST(SUQADD_2, "SUQADD", "0Q001110zz100000001110nnnnnddddd")
//INST(CLS_asimd, "CLS (vector)", "0Q001110zz100000010010nnnnnddddd")
INST(CNT, "CNT", "0Q001110zz100000010110nnnnnddddd")

View file

@ -384,6 +384,25 @@ bool TranslatorVisitor::UADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
return true;
}
bool TranslatorVisitor::SADDLP(bool Q, Imm<2> size, Vec Vn, Vec Vd) {
if (size == 0b11) {
return ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend();
const size_t datasize = Q ? 128 : 64;
const IR::U128 operand = V(datasize, Vn);
IR::U128 result = ir.VectorPairedAddSignedWiden(esize, operand);
if (datasize == 64) {
result = ir.VectorZeroUpper(result);
}
V(datasize, Vd, result);
return true;
}
bool TranslatorVisitor::SCVTF_int_4(bool Q, bool sz, Vec Vn, Vec Vd) {
return IntegerConvertToFloat(*this, Q, sz, Vn, Vd, Signedness::Signed);
}

View file

@ -1168,6 +1168,19 @@ U128 IREmitter::VectorPairedAddLower(size_t esize, const U128& a, const U128& b)
return {};
}
U128 IREmitter::VectorPairedAddSignedWiden(size_t original_esize, const U128& a) {
switch (original_esize) {
case 8:
return Inst<U128>(Opcode::VectorPairedAddSignedWiden8, a);
case 16:
return Inst<U128>(Opcode::VectorPairedAddSignedWiden16, a);
case 32:
return Inst<U128>(Opcode::VectorPairedAddSignedWiden32, a);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a) {
switch (original_esize) {
case 8:

View file

@ -236,6 +236,7 @@ public:
U128 VectorOr(const U128& a, const U128& b);
U128 VectorPairedAdd(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
U128 VectorPairedAddSignedWiden(size_t original_esize, const U128& a);
U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a);
U128 VectorPopulationCount(const U128& a);
U128 VectorReverseBits(const U128& a);

View file

@ -319,6 +319,9 @@ OPCODE(VectorOr, T::U128, T::U128, T::U
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddSignedWiden8, T::U128, T::U128 )
OPCODE(VectorPairedAddSignedWiden16, T::U128, T::U128 )
OPCODE(VectorPairedAddSignedWiden32, T::U128, T::U128 )
OPCODE(VectorPairedAddUnsignedWiden8, T::U128, T::U128 )
OPCODE(VectorPairedAddUnsignedWiden16, T::U128, T::U128 )
OPCODE(VectorPairedAddUnsignedWiden32, T::U128, T::U128 )