diff --git a/src/dynarmic/backend/x64/emit_x64_packed.cpp b/src/dynarmic/backend/x64/emit_x64_packed.cpp index 3354df32..841cabea 100644 --- a/src/dynarmic/backend/x64/emit_x64_packed.cpp +++ b/src/dynarmic/backend/x64/emit_x64_packed.cpp @@ -47,15 +47,13 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code.pxor(xmm_ge, xmm_ge); - code.movdqa(saturated_sum, xmm_a); - code.paddsb(saturated_sum, xmm_b); - code.pcmpgtb(xmm_ge, saturated_sum); - code.pcmpeqb(saturated_sum, saturated_sum); - code.pxor(xmm_ge, saturated_sum); + code.pcmpeqb(xmm0, xmm0); + + code.movdqa(xmm_ge, xmm_a); + code.paddsb(xmm_ge, xmm_b); + code.pcmpgtb(xmm_ge, xmm0); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -116,15 +114,13 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code.pxor(xmm_ge, xmm_ge); - code.movdqa(saturated_sum, xmm_a); - code.paddsw(saturated_sum, xmm_b); - code.pcmpgtw(xmm_ge, saturated_sum); - code.pcmpeqw(saturated_sum, saturated_sum); - code.pxor(xmm_ge, saturated_sum); + code.pcmpeqw(xmm0, xmm0); + + code.movdqa(xmm_ge, xmm_a); + code.paddsw(xmm_ge, xmm_b); + code.pcmpgtw(xmm_ge, xmm0); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -166,15 +162,13 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code.pxor(xmm_ge, xmm_ge); - code.movdqa(saturated_sum, xmm_a); - code.psubsb(saturated_sum, xmm_b); - code.pcmpgtb(xmm_ge, saturated_sum); - code.pcmpeqb(saturated_sum, saturated_sum); - code.pxor(xmm_ge, saturated_sum); + code.pcmpeqb(xmm0, xmm0); + + code.movdqa(xmm_ge, xmm_a); + code.psubsb(xmm_ge, xmm_b); + code.pcmpgtb(xmm_ge, xmm0); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst); @@ -244,15 +238,13 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); if (ge_inst) { - const Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - code.pxor(xmm_ge, xmm_ge); - code.movdqa(saturated_diff, xmm_a); - code.psubsw(saturated_diff, xmm_b); - code.pcmpgtw(xmm_ge, saturated_diff); - code.pcmpeqw(saturated_diff, saturated_diff); - code.pxor(xmm_ge, saturated_diff); + code.pcmpeqw(xmm0, xmm0); + + code.movdqa(xmm_ge, xmm_a); + code.psubsw(xmm_ge, xmm_b); + code.pcmpgtw(xmm_ge, xmm0); ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); ctx.EraseInstruction(ge_inst);