IR: Add VectorSignedSaturatedDoublingMultiplyLong
This commit is contained in:
parent
06b31448aa
commit
3e447614c6
5 changed files with 97 additions and 0 deletions
|
@ -3262,6 +3262,85 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiply32(EmitContext& ctx, IR::
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
code.punpcklwd(x, x);
|
||||||
|
code.punpcklwd(y, y);
|
||||||
|
code.pmaddwd(x, y);
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
||||||
|
code.vpcmpeqd(y, x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
||||||
|
code.vpxor(x, x, y);
|
||||||
|
} else {
|
||||||
|
code.movdqa(y, code.MConst(xword, 0x8000000080000000, 0x8000000080000000));
|
||||||
|
code.pcmpeqd(y, x);
|
||||||
|
code.pxor(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
code.pmovmskb(bit, y);
|
||||||
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
||||||
|
code.vpmovsxdq(x, x);
|
||||||
|
code.vpmovsxdq(y, y);
|
||||||
|
code.vpmuldq(x, x, y);
|
||||||
|
code.vpaddq(x, x, x);
|
||||||
|
} else {
|
||||||
|
const Xbyak::Reg64 a = ctx.reg_alloc.ScratchGpr();
|
||||||
|
const Xbyak::Reg64 b = ctx.reg_alloc.ScratchGpr();
|
||||||
|
const Xbyak::Reg64 c = ctx.reg_alloc.ScratchGpr();
|
||||||
|
const Xbyak::Reg64 d = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
code.movq(c, x);
|
||||||
|
code.movq(d, y);
|
||||||
|
code.movsxd(a, c.cvt32());
|
||||||
|
code.movsxd(b, d.cvt32());
|
||||||
|
code.sar(c, 32);
|
||||||
|
code.sar(d, 32);
|
||||||
|
code.imul(a, b);
|
||||||
|
code.imul(c, d);
|
||||||
|
code.movq(x, a);
|
||||||
|
code.movq(y, c);
|
||||||
|
code.punpcklqdq(x, y);
|
||||||
|
code.paddq(x, x);
|
||||||
|
|
||||||
|
ctx.reg_alloc.Release(a);
|
||||||
|
ctx.reg_alloc.Release(b);
|
||||||
|
ctx.reg_alloc.Release(c);
|
||||||
|
ctx.reg_alloc.Release(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
|
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX)) {
|
||||||
|
code.vpcmpeqq(y, x, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
||||||
|
code.vpxor(x, x, y);
|
||||||
|
code.vpmovmskb(bit, y);
|
||||||
|
} else {
|
||||||
|
code.movdqa(y, code.MConst(xword, 0x8000000000000000, 0x8000000000000000));
|
||||||
|
code.pcmpeqd(y, x);
|
||||||
|
code.shufps(y, y, 0b11110101);
|
||||||
|
code.pxor(x, y);
|
||||||
|
code.pmovmskb(bit, y);
|
||||||
|
}
|
||||||
|
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
|
const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
|
|
@ -1594,6 +1594,17 @@ UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, con
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyLong16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyLong32, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 16:
|
case 16:
|
||||||
|
|
|
@ -274,6 +274,7 @@ public:
|
||||||
U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
|
U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
|
||||||
UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b);
|
UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b);
|
||||||
|
U128 VectorSignedSaturatedDoublingMultiplyLong(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
|
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
|
||||||
|
|
|
@ -355,6 +355,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
case Opcode::VectorSignedSaturatedAccumulateUnsigned16:
|
case Opcode::VectorSignedSaturatedAccumulateUnsigned16:
|
||||||
case Opcode::VectorSignedSaturatedAccumulateUnsigned32:
|
case Opcode::VectorSignedSaturatedAccumulateUnsigned32:
|
||||||
case Opcode::VectorSignedSaturatedAccumulateUnsigned64:
|
case Opcode::VectorSignedSaturatedAccumulateUnsigned64:
|
||||||
|
case Opcode::VectorSignedSaturatedDoublingMultiply16:
|
||||||
|
case Opcode::VectorSignedSaturatedDoublingMultiply32:
|
||||||
|
case Opcode::VectorSignedSaturatedDoublingMultiplyLong16:
|
||||||
|
case Opcode::VectorSignedSaturatedDoublingMultiplyLong32:
|
||||||
case Opcode::VectorSignedSaturatedNarrowToSigned16:
|
case Opcode::VectorSignedSaturatedNarrowToSigned16:
|
||||||
case Opcode::VectorSignedSaturatedNarrowToSigned32:
|
case Opcode::VectorSignedSaturatedNarrowToSigned32:
|
||||||
case Opcode::VectorSignedSaturatedNarrowToSigned64:
|
case Opcode::VectorSignedSaturatedNarrowToSigned64:
|
||||||
|
|
|
@ -410,6 +410,8 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128,
|
||||||
OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
|
OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
|
OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
|
OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedDoublingMultiplyLong16, U128, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedDoublingMultiplyLong32, U128, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
|
OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
|
OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
|
OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue