Implement UHASX, UHSAX, SHASX and SHSAX (#75)
This commit is contained in:
parent
e9df248d56
commit
d5610eb26c
5 changed files with 122 additions and 8 deletions
|
@ -2012,6 +2012,88 @@ void EmitX64::EmitPackedHalvingSubS16(IR::Block&, IR::Inst* inst) {
|
||||||
code->xor(minuend, carry);
|
code->xor(minuend, carry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedHalvingSubAddU16(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
// If asx is true, the high word contains the sum and the low word the difference.
|
||||||
|
// If false, the high word contains the difference and the low word the sum.
|
||||||
|
bool asx = inst->GetArg(2).GetU1();
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code->movzx(reg_a_lo, reg_a_hi.cvt16());
|
||||||
|
code->movzx(reg_b_lo, reg_b_hi.cvt16());
|
||||||
|
code->shr(reg_a_hi, 16);
|
||||||
|
code->shr(reg_b_hi, 16);
|
||||||
|
|
||||||
|
if (asx) {
|
||||||
|
// Calculate diff such that reg_a_lo<31:16> contains diff<16:1>.
|
||||||
|
code->sub(reg_a_lo, reg_b_hi);
|
||||||
|
code->shl(reg_a_lo, 15);
|
||||||
|
|
||||||
|
// Calculate sum such that reg_a_hi<15:0> contains sum<16:1>.
|
||||||
|
code->add(reg_a_hi, reg_b_lo);
|
||||||
|
code->shr(reg_a_hi, 1);
|
||||||
|
} else {
|
||||||
|
// Calculate sum such that reg_a_lo<31:16> contains sum<16:1>.
|
||||||
|
code->add(reg_a_lo, reg_b_hi);
|
||||||
|
code->shl(reg_a_lo, 15);
|
||||||
|
|
||||||
|
// Calculate diff such that reg_a_hi<15:0> contains diff<16:1>.
|
||||||
|
code->sub(reg_a_hi, reg_b_lo);
|
||||||
|
code->shr(reg_a_hi, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
||||||
|
// Merge them.
|
||||||
|
code->shld(reg_a_hi, reg_a_lo, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitPackedHalvingSubAddS16(IR::Block&, IR::Inst* inst) {
|
||||||
|
IR::Value a = inst->GetArg(0);
|
||||||
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
||||||
|
// If asx is true, the high word contains the sum and the low word the difference.
|
||||||
|
// If false, the high word contains the difference and the low word the sum.
|
||||||
|
bool asx = inst->GetArg(2).GetU1();
|
||||||
|
|
||||||
|
Xbyak::Reg32 reg_a_hi = reg_alloc.UseDefGpr(a, inst).cvt32();
|
||||||
|
Xbyak::Reg32 reg_b_hi = reg_alloc.UseScratchGpr(b).cvt32();
|
||||||
|
Xbyak::Reg32 reg_a_lo = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
Xbyak::Reg32 reg_b_lo = reg_alloc.ScratchGpr().cvt32();
|
||||||
|
|
||||||
|
code->movsx(reg_a_lo, reg_a_hi.cvt16());
|
||||||
|
code->movsx(reg_b_lo, reg_b_hi.cvt16());
|
||||||
|
code->sar(reg_a_hi, 16);
|
||||||
|
code->sar(reg_b_hi, 16);
|
||||||
|
|
||||||
|
if (asx) {
|
||||||
|
// Calculate diff such that reg_a_lo<31:16> contains diff<16:1>.
|
||||||
|
code->sub(reg_a_lo, reg_b_hi);
|
||||||
|
code->shl(reg_a_lo, 15);
|
||||||
|
|
||||||
|
// Calculate sum such that reg_a_hi<15:0> contains sum<16:1>.
|
||||||
|
code->add(reg_a_hi, reg_b_lo);
|
||||||
|
code->shr(reg_a_hi, 1);
|
||||||
|
} else {
|
||||||
|
// Calculate sum such that reg_a_lo<31:16> contains sum<16:1>.
|
||||||
|
code->add(reg_a_lo, reg_b_hi);
|
||||||
|
code->shl(reg_a_lo, 15);
|
||||||
|
|
||||||
|
// Calculate diff such that reg_a_hi<15:0> contains diff<16:1>.
|
||||||
|
code->sub(reg_a_hi, reg_b_lo);
|
||||||
|
code->shr(reg_a_hi, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
||||||
|
// Merge them.
|
||||||
|
code->shld(reg_a_hi, reg_a_lo, 16);
|
||||||
|
}
|
||||||
|
|
||||||
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
static void EmitPackedOperation(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||||
IR::Value a = inst->GetArg(0);
|
IR::Value a = inst->GetArg(0);
|
||||||
IR::Value b = inst->GetArg(1);
|
IR::Value b = inst->GetArg(1);
|
||||||
|
|
|
@ -442,6 +442,14 @@ Value IREmitter::PackedHalvingSubS16(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedHalvingSubS16, {a, b});
|
return Inst(Opcode::PackedHalvingSubS16, {a, b});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value IREmitter::PackedHalvingSubAddU16(const Value& a, const Value& b, bool asx) {
|
||||||
|
return Inst(Opcode::PackedHalvingSubAddU16, {a, b, Imm1(asx)});
|
||||||
|
}
|
||||||
|
|
||||||
|
Value IREmitter::PackedHalvingSubAddS16(const Value& a, const Value& b, bool asx) {
|
||||||
|
return Inst(Opcode::PackedHalvingSubAddS16, {a, b, Imm1(asx)});
|
||||||
|
}
|
||||||
|
|
||||||
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
Value IREmitter::PackedSaturatedAddU8(const Value& a, const Value& b) {
|
||||||
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
return Inst(Opcode::PackedSaturatedAddU8, {a, b});
|
||||||
}
|
}
|
||||||
|
|
|
@ -157,6 +157,8 @@ public:
|
||||||
Value PackedHalvingAddS16(const Value& a, const Value& b);
|
Value PackedHalvingAddS16(const Value& a, const Value& b);
|
||||||
Value PackedHalvingSubU16(const Value& a, const Value& b);
|
Value PackedHalvingSubU16(const Value& a, const Value& b);
|
||||||
Value PackedHalvingSubS16(const Value& a, const Value& b);
|
Value PackedHalvingSubS16(const Value& a, const Value& b);
|
||||||
|
Value PackedHalvingSubAddU16(const Value& a, const Value& b, bool asx);
|
||||||
|
Value PackedHalvingSubAddS16(const Value& a, const Value& b, bool asx);
|
||||||
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
Value PackedSaturatedAddU8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
Value PackedSaturatedAddS8(const Value& a, const Value& b);
|
||||||
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
Value PackedSaturatedSubU8(const Value& a, const Value& b);
|
||||||
|
|
|
@ -99,6 +99,8 @@ OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32
|
||||||
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedHalvingSubS16, T::U32, T::U32, T::U32 )
|
OPCODE(PackedHalvingSubS16, T::U32, T::U32, T::U32 )
|
||||||
|
OPCODE(PackedHalvingSubAddU16, T::U32, T::U32, T::U32, T::U1 )
|
||||||
|
OPCODE(PackedHalvingSubAddS16, T::U32, T::U32, T::U32, T::U1 )
|
||||||
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
|
||||||
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
|
||||||
|
|
|
@ -258,13 +258,23 @@ bool ArmTranslatorVisitor::arm_SHADD16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SHASX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SHASX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubAddS16(ir.GetRegister(n), ir.GetRegister(m), true);
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SHSAX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SHSAX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubAddS16(ir.GetRegister(n), ir.GetRegister(m), false);
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_SHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_SHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
@ -308,13 +318,23 @@ bool ArmTranslatorVisitor::arm_UHADD16(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHASX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHASX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubAddU16(ir.GetRegister(n), ir.GetRegister(m), true);
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHSAX(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHSAX(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
UNUSED(cond, n, d, m);
|
if (d == Reg::PC || n == Reg::PC || m == Reg::PC)
|
||||||
return InterpretThisInstruction();
|
return UnpredictableInstruction();
|
||||||
|
if (ConditionPassed(cond)) {
|
||||||
|
auto result = ir.PackedHalvingSubAddU16(ir.GetRegister(n), ir.GetRegister(m), false);
|
||||||
|
ir.SetRegister(d, result);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmTranslatorVisitor::arm_UHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
bool ArmTranslatorVisitor::arm_UHSUB8(Cond cond, Reg n, Reg d, Reg m) {
|
||||||
|
|
Loading…
Reference in a new issue