emit_x64_vector: Optimize VectorSignedAbsoluteDifference
This commit is contained in:
parent
7e66e082fd
commit
ba9009abd8
1 changed files with 42 additions and 26 deletions
|
@ -3744,35 +3744,51 @@ static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, I
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||||
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
|
|
||||||
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
|
|
||||||
|
|
||||||
code.movdqa(mask, x);
|
// only signed 16-bit min/max are available below SSE4.1
|
||||||
code.movdqa(tmp1, y);
|
if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) {
|
||||||
|
code.movdqa(tmp, x);
|
||||||
|
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
code.pcmpgtb(mask, y);
|
code.pminsb(tmp, y);
|
||||||
code.psubb(tmp1, x);
|
code.pmaxsb(x, y);
|
||||||
code.psubb(x, y);
|
code.psubb(x, tmp);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
code.pcmpgtw(mask, y);
|
code.pminsw(tmp, y);
|
||||||
code.psubw(tmp1, x);
|
code.pmaxsw(x, y);
|
||||||
code.psubw(x, y);
|
code.psubw(x, tmp);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
code.pcmpgtd(mask, y);
|
code.pminsd(tmp, y);
|
||||||
code.psubd(tmp1, x);
|
code.pmaxsd(x, y);
|
||||||
code.psubd(x, y);
|
code.psubd(x, tmp);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
code.movdqa(tmp, y);
|
||||||
|
|
||||||
code.movdqa(tmp2, mask);
|
switch (esize) {
|
||||||
code.pand(x, mask);
|
case 8:
|
||||||
code.pandn(tmp2, tmp1);
|
code.pcmpgtb(tmp, x);
|
||||||
code.por(x, tmp2);
|
code.psubb(x, y);
|
||||||
|
code.pxor(x, tmp);
|
||||||
|
code.psubb(x, tmp);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.pcmpgtd(tmp, x);
|
||||||
|
code.psubd(x, y);
|
||||||
|
code.pxor(x, tmp);
|
||||||
|
code.psubd(x, tmp);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, x);
|
ctx.reg_alloc.DefineValue(inst, x);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue