emit_x64_packed: EmitPackedSubU16 modified xmm_b wasn't writeable
For CPUs that didn't support SSE4.1, this was a bug.
This commit is contained in:
parent
f1057aa362
commit
d124a1d761
1 changed files with 38 additions and 26 deletions
|
@ -194,20 +194,35 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||||
|
|
||||||
|
if (!ge_inst) {
|
||||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
|
|
||||||
if (ge_inst) {
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||||
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
code->movdqa(xmm_ge, xmm_a);
|
code->movdqa(xmm_ge, xmm_a);
|
||||||
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||||
code->pcmpeqw(xmm_ge, xmm_a);
|
code->pcmpeqw(xmm_ge, xmm_a);
|
||||||
|
|
||||||
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
ctx.EraseInstruction(ge_inst);
|
ctx.EraseInstruction(ge_inst);
|
||||||
} else {
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||||
|
|
||||||
|
@ -219,13 +234,10 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||||
code->pxor(xmm_ge, ones);
|
code->pxor(xmm_ge, ones);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
|
||||||
ctx.EraseInstruction(ge_inst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
code->psubw(xmm_a, xmm_b);
|
code->psubw(xmm_a, xmm_b);
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||||
|
ctx.EraseInstruction(ge_inst);
|
||||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue