Implement USUB8

This commit is contained in:
MerryMage 2016-12-05 00:27:59 +00:00
parent 5c1aab1666
commit 52e1445f43
5 changed files with 62 additions and 1 deletions

View file

@ -1289,6 +1289,52 @@ void EmitX64::EmitPackedAddU8(IR::Block& block, IR::Inst* inst) {
}
}
void EmitX64::EmitPackedSubU8(IR::Block& block, IR::Inst* inst) {
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
Xbyak::Reg32 reg_ge;
Xbyak::Xmm xmm_a = reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_b = reg_alloc.ScratchXmm();
Xbyak::Xmm xmm_ge;
if (ge_inst) {
EraseInstruction(block, ge_inst);
inst->DecrementRemainingUses();
reg_ge = reg_alloc.DefGpr(ge_inst).cvt32();
xmm_ge = reg_alloc.ScratchXmm();
}
code->movd(xmm_a, reg_a);
code->movd(xmm_b, reg_b);
if (ge_inst) {
code->movaps(xmm_ge, xmm_a);
code->pmaxub(xmm_ge, xmm_b);
code->pcmpeqb(xmm_ge, xmm_a);
code->movd(reg_ge, xmm_ge);
}
code->psubb(xmm_a, xmm_b);
code->movd(reg_a, xmm_a);
if (ge_inst) {
if (cpu_info.has(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x80808080);
code->pext(reg_ge, reg_ge, tmp);
} else {
code->and_(reg_ge, 0x80808080);
code->imul(reg_ge, reg_ge, 0x0204081);
code->shr(reg_ge, 28);
}
}
}
void EmitX64::EmitPackedHalvingAddU8(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);

View file

@ -330,6 +330,12 @@ IREmitter::ResultAndGE IREmitter::PackedAddU8(const Value& a, const Value& b) {
return {result, ge};
}
IREmitter::ResultAndGE IREmitter::PackedSubU8(const Value& a, const Value& b) {
auto result = Inst(Opcode::PackedSubU8, {a, b});
auto ge = Inst(Opcode::GetGEFromOp, {result});
return {result, ge};
}
Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) {
return Inst(Opcode::PackedHalvingAddU8, {a, b});
}

View file

@ -128,6 +128,7 @@ public:
Value ByteReverseHalf(const Value& a);
Value ByteReverseDual(const Value& a);
ResultAndGE PackedAddU8(const Value& a, const Value& b);
ResultAndGE PackedSubU8(const Value& a, const Value& b);
Value PackedHalvingAddU8(const Value& a, const Value& b);
Value PackedHalvingAddS8(const Value& a, const Value& b);
Value PackedHalvingSubU8(const Value& a, const Value& b);

View file

@ -73,6 +73,7 @@ OPCODE(ByteReverseWord, T::U32, T::U32
OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )

View file

@ -58,7 +58,14 @@ bool ArmTranslatorVisitor::arm_USAX(Cond cond, Reg n, Reg d, Reg m) {
}
bool ArmTranslatorVisitor::arm_USUB8(Cond cond, Reg n, Reg d, Reg m) {
return InterpretThisInstruction();
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
auto result = ir.PackedSubU8(ir.GetRegister(n), ir.GetRegister(m));
ir.SetRegister(d, result.result);
ir.SetGEFlags(result.ge);
}
return true;
}
bool ArmTranslatorVisitor::arm_USUB16(Cond cond, Reg n, Reg d, Reg m) {