emit_x64_vector: Add SSE4.1 implementation of VUZP1.8H
This commit is contained in:
parent
a43c176fc3
commit
73a75b5034
1 changed files with 14 additions and 5 deletions
|
@ -1079,6 +1079,14 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
|
code.pxor(zero, zero);
|
||||||
|
|
||||||
|
code.pblendw(lhs, zero, 0b10101010);
|
||||||
|
code.pblendw(rhs, zero, 0b10101010);
|
||||||
|
code.packusdw(lhs, rhs);
|
||||||
|
} else {
|
||||||
code.pslld(lhs, 16);
|
code.pslld(lhs, 16);
|
||||||
code.psrad(lhs, 16);
|
code.psrad(lhs, 16);
|
||||||
|
|
||||||
|
@ -1086,6 +1094,7 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.psrad(rhs, 16);
|
code.psrad(rhs, 16);
|
||||||
|
|
||||||
code.packssdw(lhs, rhs);
|
code.packssdw(lhs, rhs);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue