emit_x64_packed: EmitPackedSubU16 modified xmm_b wasn't writeable

For CPUs that didn't support SSE4.1, this was a bug.
This commit is contained in:
MerryMage 2018-01-25 18:37:03 +00:00
parent f1057aa362
commit d124a1d761

View file

@ -194,20 +194,35 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
if (!ge_inst) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
if (ge_inst) {
code->psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
return;
}
if (code->DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
code->movdqa(xmm_ge, xmm_a);
code->pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
code->pcmpeqw(xmm_ge, xmm_a);
code->psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
} else {
ctx.reg_alloc.DefineValue(inst, xmm_a);
return;
}
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
@ -219,13 +234,10 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code->pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
code->pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
}
}
code->psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.EraseInstruction(ge_inst);
ctx.reg_alloc.DefineValue(inst, xmm_a);
}