Implement UHSUB8 and UHSUB16 (#48)
This commit is contained in:
parent
cb17f9a3ed
commit
0ff8c375af
5 changed files with 86 additions and 2 deletions
|
@ -1397,6 +1397,66 @@ void EmitX64::EmitPackedHalvingAddS16(IR::Block& block, IR::Inst* inst) {
|
||||||
code->xor(result, carry);
|
code->xor(result, carry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedHalvingSubU8(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
|
||||||
|
|
||||||
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||||
|
// Note that x^y always contains the LSB of the result.
|
||||||
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||||
|
|
||||||
|
code->xor(minuend, subtrahend);
|
||||||
|
code->and(subtrahend, minuend);
|
||||||
|
code->shr(minuend, 1);
|
||||||
|
|
||||||
|
// At this point,
|
||||||
|
// minuend := (a^b) >> 1
|
||||||
|
// subtrahend := (a^b) & b
|
||||||
|
|
||||||
|
// We must now perform a partitioned subtraction.
|
||||||
|
// We can do this because minuend contains 7 bit fields.
|
||||||
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||||
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||||
|
code->or(minuend, 0x80808080);
|
||||||
|
code->sub(minuend, subtrahend);
|
||||||
|
code->xor(minuend, 0x80808080);
|
||||||
|
|
||||||
|
// minuend now contains the desired result.
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedHalvingSubU16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
|
||||||
|
|
||||||
|
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||||
|
// Note that x^y always contains the LSB of the result.
|
||||||
|
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||||
|
|
||||||
|
code->xor(minuend, subtrahend);
|
||||||
|
code->and(subtrahend, minuend);
|
||||||
|
code->shr(minuend, 1);
|
||||||
|
|
||||||
|
// At this point,
|
||||||
|
// minuend := (a^b) >> 1
|
||||||
|
// subtrahend := (a^b) & b
|
||||||
|
|
||||||
|
// We must now perform a partitioned subtraction.
|
||||||
|
// We can do this because minuend contains 15 bit fields.
|
||||||
|
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||||
|
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||||
|
code->or(minuend, 0x80008000);
|
||||||
|
code->sub(minuend, subtrahend);
|
||||||
|
code->xor(minuend, 0x80008000);
|
||||||
|
|
||||||
|
// minuend now contains the desired result.
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -328,6 +328,10 @@ Value IREmitter::PackedHalvingAddS8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingAddS8, {a, b});
|
return Inst(Opcode::PackedHalvingAddS8, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::PackedHalvingSubU8(const Value& a, const Value& b) {
|
||||||
|
return Inst(Opcode::PackedHalvingSubU8, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
|
Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingAddU16, {a, b});
|
return Inst(Opcode::PackedHalvingAddU16, {a, b});
|
||||||
}
|
}
|
||||||
|
@ -336,6 +340,10 @@ Value IREmitter::PackedHalvingAddS16(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingAddS16, {a, b});
|
return Inst(Opcode::PackedHalvingAddS16, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::PackedHalvingSubU16(const Value& a, const Value& b) {
|
||||||
|
return Inst(Opcode::PackedHalvingSubU16, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,8 +123,10 @@ public:
|
||||||
Value ByteReverseDual(const Value& a);
|
Value ByteReverseDual(const Value& a);
|
||||||
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddS8(const Value& a, const Value& b);
|
Value PackedHalvingAddS8(const Value& a, const Value& b);
|
||||||
|
Value PackedHalvingSubU8(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddU16(const Value& a, const Value& b);
|
Value PackedHalvingAddU16(const Value& a, const Value& b);
|
||||||
Value PackedHalvingAddS16(const Value& a, const Value& b);
|
Value PackedHalvingAddS16(const Value& a, const Value& b);
|
||||||
|
Value PackedHalvingSubU16(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
||||||
|
|
|
@ -73,8 +73,10 @@ OPCODE(ByteReverseHalf, T::U16, T::U16
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
|
@ -219,11 +219,23 @@ bool ArmTranslatorVisitor::arm_UHSAX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
return InterpretThisInstruction();
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubU8(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHSUB16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
return InterpretThisInstruction();
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubU16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Arm
|
} // namespace Arm
|
||||||
|
|
Loading…
Reference in a new issue