emit_x64_vector: Add SSE4.1 implementation of VUZP{1,2}.2S
This commit is contained in:
parent
d68b916f57
commit
4c2bd4ed29
1 changed files with 25 additions and 9 deletions
|
@ -1149,8 +1149,13 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst
|
|||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
// copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes
|
||||
code.insertps(lhs, rhs, 0b00011100);
|
||||
} else {
|
||||
code.unpcklps(lhs, rhs);
|
||||
code.movq(lhs, lhs);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||
}
|
||||
|
@ -1229,6 +1234,16 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst)
|
|||
|
||||
void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
// copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes
|
||||
code.insertps(rhs, lhs, 0b01001100);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, rhs);
|
||||
} else {
|
||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -1239,6 +1254,7 @@ void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst)
|
|||
|
||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pxor);
|
||||
|
|
Loading…
Reference in a new issue