emit_x64_packed: Optimize GE flag generation for signed packed add/sub
sum >= 0 is equivalent to sum > -1
This commit is contained in:
parent
b224fad171
commit
2779f24862
1 changed files with 20 additions and 28 deletions
|
@ -47,15 +47,13 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.paddsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -116,15 +114,13 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsw(saturated_sum, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_sum);
|
||||
code.pcmpeqw(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.paddsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -166,15 +162,13 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.psubsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.psubsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -244,15 +238,13 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_diff, xmm_a);
|
||||
code.psubsw(saturated_diff, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_diff);
|
||||
code.pcmpeqw(saturated_diff, saturated_diff);
|
||||
code.pxor(xmm_ge, saturated_diff);
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.psubsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
|
Loading…
Reference in a new issue