emit_x64_vector: Add SSE4.1 implementation of VUZP{1,2}.2S
This commit is contained in:
parent
d68b916f57
commit
4c2bd4ed29
1 changed files with 25 additions and 9 deletions
|
@ -1149,8 +1149,13 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst
|
||||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
|
// copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes
|
||||||
|
code.insertps(lhs, rhs, 0b00011100);
|
||||||
|
} else {
|
||||||
code.unpcklps(lhs, rhs);
|
code.unpcklps(lhs, rhs);
|
||||||
code.movq(lhs, lhs);
|
code.movq(lhs, lhs);
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||||
}
|
}
|
||||||
|
@ -1229,6 +1234,16 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst)
|
||||||
|
|
||||||
void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||||
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(args[0]);
|
||||||
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
|
|
||||||
|
// copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes
|
||||||
|
code.insertps(rhs, lhs, 0b01001100);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, rhs);
|
||||||
|
} else {
|
||||||
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -1238,6 +1253,7 @@ void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst)
|
||||||
code.unpckhpd(lhs, zero);
|
code.unpckhpd(lhs, zero);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, lhs);
|
ctx.reg_alloc.DefineValue(inst, lhs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorEor(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
Loading…
Reference in a new issue