emit_x64_vector: Add SSSE3 implementation of VUZP{1,2}.8B
This commit is contained in:
parent
521bf64ef2
commit
8ef0f2b54f
1 changed files with 30 additions and 14 deletions
|
@ -1122,15 +1122,23 @@ void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
|
||||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||||
code.pand(lhs, tmp);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
code.pand(rhs, tmp);
|
|
||||||
code.packuswb(lhs, rhs);
|
code.punpcklbw(lhs, rhs);
|
||||||
code.pshufd(lhs, lhs, 0b11011000);
|
code.pshufb(lhs, code.MConst(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080));
|
||||||
code.movq(lhs, lhs);
|
} else {
|
||||||
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
code.movdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF));
|
||||||
|
code.pand(lhs, tmp);
|
||||||
|
code.pand(rhs, tmp);
|
||||||
|
code.packuswb(lhs, rhs);
|
||||||
|
code.pshufd(lhs, lhs, 0b11011000);
|
||||||
|
code.movq(lhs, lhs);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||||
}
|
}
|
||||||
|
@ -1224,13 +1232,21 @@ void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
|
||||||
|
|
||||||
code.psraw(lhs, 8);
|
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||||
code.psraw(rhs, 8);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
code.packsswb(lhs, rhs);
|
|
||||||
code.pshufd(lhs, lhs, 0b11011000);
|
code.punpcklbw(lhs, rhs);
|
||||||
code.movq(lhs, lhs);
|
code.pshufb(lhs, code.MConst(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080));
|
||||||
|
} else {
|
||||||
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
code.psraw(lhs, 8);
|
||||||
|
code.psraw(rhs, 8);
|
||||||
|
code.packsswb(lhs, rhs);
|
||||||
|
code.pshufd(lhs, lhs, 0b11011000);
|
||||||
|
code.movq(lhs, lhs);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue