emit_x64: pmaxuw and pminuw require SSE 4.1
This commit is intended to close citra-emu/citra#3137. pmaxuw and pminuw were used to perform unsigned comparisons; we emulate these using a signed comparison by offsetting the inputs by 0x8000 for CPUs that do not support SSE 4.1.
This commit is contained in:
parent
c1495ca5da
commit
08f638d447
1 changed files with 30 additions and 9 deletions
|
@ -1554,11 +1554,22 @@ void EmitX64::EmitPackedAddU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32();
|
Xbyak::Reg32 reg_ge = reg_alloc.ScratchGpr().cvt32();
|
||||||
Xbyak::Xmm tmp = reg_alloc.ScratchXmm();
|
Xbyak::Xmm tmp = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(tmp, xmm_a);
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
code->pminuw(tmp, xmm_b);
|
code->movdqa(tmp, xmm_a);
|
||||||
code->pcmpeqw(tmp, xmm_b);
|
code->pminuw(tmp, xmm_b);
|
||||||
code->movd(reg_ge, tmp);
|
code->pcmpeqw(tmp, xmm_b);
|
||||||
code->not_(reg_ge);
|
code->movd(reg_ge, tmp);
|
||||||
|
code->not_(reg_ge);
|
||||||
|
} else {
|
||||||
|
// !(b <= a+b) == b > a+b
|
||||||
|
Xbyak::Xmm tmp_b = reg_alloc.ScratchXmm();
|
||||||
|
code->movdqa(tmp, xmm_a);
|
||||||
|
code->movdqa(tmp_b, xmm_b);
|
||||||
|
code->paddw(tmp, code->MConst(0x80008000));
|
||||||
|
code->paddw(tmp_b, code->MConst(0x80008000));
|
||||||
|
code->pcmpgtw(tmp_b, tmp); // *Signed* comparison!
|
||||||
|
code->movd(reg_ge, tmp_b);
|
||||||
|
}
|
||||||
|
|
||||||
ExtractMostSignificantBitFromPackedBytes(code, reg_alloc, reg_ge);
|
ExtractMostSignificantBitFromPackedBytes(code, reg_alloc, reg_ge);
|
||||||
reg_alloc.DefineValue(ge_inst, reg_ge);
|
reg_alloc.DefineValue(ge_inst, reg_ge);
|
||||||
|
@ -1672,10 +1683,20 @@ void EmitX64::EmitPackedSubU16(RegAlloc& reg_alloc, IR::Block& block, IR::Inst*
|
||||||
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
reg_ge = reg_alloc.ScratchGpr().cvt32();
|
||||||
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
code->pmaxuw(xmm_ge, xmm_b);
|
code->movdqa(xmm_ge, xmm_a);
|
||||||
code->pcmpeqw(xmm_ge, xmm_a);
|
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||||
code->movd(reg_ge, xmm_ge);
|
code->pcmpeqw(xmm_ge, xmm_a);
|
||||||
|
code->movd(reg_ge, xmm_ge);
|
||||||
|
} else {
|
||||||
|
// (a >= b) == !(b > a)
|
||||||
|
code->paddw(xmm_a, code->MConst(0x80008000));
|
||||||
|
code->paddw(xmm_b, code->MConst(0x80008000));
|
||||||
|
code->movdqa(xmm_ge, xmm_b);
|
||||||
|
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
|
code->movd(reg_ge, xmm_ge);
|
||||||
|
code->not_(reg_ge);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
|
Loading…
Reference in a new issue