Implemented UHADD16
This commit is contained in:
parent
4f7dc81492
commit
c0c1bb1094
5 changed files with 36 additions and 1 deletions
|
@ -1318,6 +1318,29 @@ void EmitX64::EmitPackedHalvingAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
code->add(result, xor_a_b);
|
code->add(result, xor_a_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedHalvingAddU16(IR::Block& block, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b = reg_alloc.UseGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 xor_a_b = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
Xbyak::Reg32 and_a_b = reg_a;
|
||||||
|
Xbyak::Reg32 result = reg_a;
|
||||||
|
|
||||||
|
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||||
|
// Note that x^y always contains the LSB of the result.
|
||||||
|
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||||
|
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
||||||
|
|
||||||
|
code->mov(xor_a_b, reg_a);
|
||||||
|
code->and(and_a_b, reg_b);
|
||||||
|
code->xor(xor_a_b, reg_b);
|
||||||
|
code->shr(xor_a_b, 1);
|
||||||
|
code->and(xor_a_b, 0x7FFF7FFF);
|
||||||
|
code->add(result, xor_a_b);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
|
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
|
||||||
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -324,6 +324,10 @@ Value IREmitter::PackedHalvingAddU8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingAddU8, { a, b });
|
return Inst(Opcode::PackedHalvingAddU8, { a, b });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::PackedHalvingAddU16(const Value& a, const Value& b) {
|
||||||
|
return Inst(Opcode::PackedHalvingAddU16, {a, b});
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,7 @@ public:
|
||||||
Value ByteReverseHalf(const Value& a);
|
Value ByteReverseHalf(const Value& a);
|
||||||
Value ByteReverseDual(const Value& a);
|
Value ByteReverseDual(const Value& a);
|
||||||
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
Value PackedHalvingAddU8(const Value& a, const Value& b);
|
||||||
|
Value PackedHalvingAddU16(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
||||||
|
|
|
@ -72,6 +72,7 @@ OPCODE(ByteReverseWord, T::U32, T::U32
|
||||||
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
OPCODE(ByteReverseHalf, T::U16, T::U16 )
|
||||||
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
OPCODE(ByteReverseDual, T::U64, T::U64 )
|
||||||
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
|
@ -189,7 +189,13 @@ bool ArmTranslatorVisitor::arm_UHADD8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHADD16(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHADD16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
return InterpretThisInstruction();
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingAddU16(ir.GetRegister(n), ir.GetRegister(m));
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHASX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHASX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
Loading…
Reference in a new issue