A64: Implement SQ{ADD, SUB}, and UQ{ADD, SUB}'s vector variants

Currently we implement these in terms of the scalar variants. Falling
back to the interpreter is slow enough to make it more effective than
doing that.
This commit is contained in:
Lioncash 2018-08-13 14:50:29 -04:00 committed by MerryMage
parent a4b0e2ace6
commit 7ef7def661
2 changed files with 69 additions and 11 deletions

View file

@ -702,10 +702,10 @@ INST(UMULL_vec, "UMULL, UMULL2 (vector)", "0Q101
// Data Processing - FP and SIMD - SIMD three same // Data Processing - FP and SIMD - SIMD three same
INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd") INST(SHADD, "SHADD", "0Q001110zz1mmmmm000001nnnnnddddd")
//INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd") INST(SQADD_2, "SQADD", "0Q001110zz1mmmmm000011nnnnnddddd")
INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd") INST(SRHADD, "SRHADD", "0Q001110zz1mmmmm000101nnnnnddddd")
INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd") INST(SHSUB, "SHSUB", "0Q001110zz1mmmmm001001nnnnnddddd")
//INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd") INST(SQSUB_2, "SQSUB", "0Q001110zz1mmmmm001011nnnnnddddd")
INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd") INST(CMGT_reg_2, "CMGT (register)", "0Q001110zz1mmmmm001101nnnnnddddd")
INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd") INST(CMGE_reg_2, "CMGE (register)", "0Q001110zz1mmmmm001111nnnnnddddd")
INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd") INST(SSHL_2, "SSHL", "0Q001110zz1mmmmm010001nnnnnddddd")
@ -743,10 +743,10 @@ INST(FRSQRTS_4, "FRSQRTS", "0Q001
INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd") INST(ORR_asimd_reg, "ORR (vector, register)", "0Q001110101mmmmm000111nnnnnddddd")
INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd") INST(ORN_asimd, "ORN (vector)", "0Q001110111mmmmm000111nnnnnddddd")
INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd") INST(UHADD, "UHADD", "0Q101110zz1mmmmm000001nnnnnddddd")
//INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd") INST(UQADD_2, "UQADD", "0Q101110zz1mmmmm000011nnnnnddddd")
INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd") INST(URHADD, "URHADD", "0Q101110zz1mmmmm000101nnnnnddddd")
INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd") INST(UHSUB, "UHSUB", "0Q101110zz1mmmmm001001nnnnnddddd")
//INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd") INST(UQSUB_2, "UQSUB", "0Q101110zz1mmmmm001011nnnnnddddd")
INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd") INST(CMHI_2, "CMHI (register)", "0Q101110zz1mmmmm001101nnnnnddddd")
INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd") INST(CMHS_2, "CMHS (register)", "0Q101110zz1mmmmm001111nnnnnddddd")
INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd") INST(USHL_2, "USHL", "0Q101110zz1mmmmm010001nnnnnddddd")

View file

@ -8,7 +8,7 @@
namespace Dynarmic::A64 { namespace Dynarmic::A64 {
namespace { namespace {
enum class HighNarrowingOp { enum class Operation {
Add, Add,
Subtract, Subtract,
}; };
@ -19,7 +19,7 @@ enum class ExtraBehavior {
}; };
bool HighNarrowingOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd, bool HighNarrowingOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd,
HighNarrowingOp op, ExtraBehavior behavior) { Operation op, ExtraBehavior behavior) {
if (size == 0b11) { if (size == 0b11) {
return v.ReservedValue(); return v.ReservedValue();
} }
@ -31,7 +31,7 @@ bool HighNarrowingOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, V
const IR::U128 operand1 = v.ir.GetQ(Vn); const IR::U128 operand1 = v.ir.GetQ(Vn);
const IR::U128 operand2 = v.ir.GetQ(Vm); const IR::U128 operand2 = v.ir.GetQ(Vm);
IR::U128 wide = [&] { IR::U128 wide = [&] {
if (op == HighNarrowingOp::Add) { if (op == Operation::Add) {
return v.ir.VectorAdd(doubled_esize, operand1, operand2); return v.ir.VectorAdd(doubled_esize, operand1, operand2);
} }
return v.ir.VectorSub(doubled_esize, operand1, operand2); return v.ir.VectorSub(doubled_esize, operand1, operand2);
@ -247,6 +247,48 @@ bool PairedMinMaxOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Ve
return true; return true;
} }
bool SaturatingArithmeticOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd,
Operation op, Signedness sign) {
if (size == 0b11 && !Q) {
return v.ReservedValue();
}
const size_t esize = 8 << size.ZeroExtend();
const size_t datasize = Q ? 128 : 64;
const size_t elements = datasize / esize;
const IR::U128 operand1 = v.V(datasize, Vn);
const IR::U128 operand2 = v.V(datasize, Vm);
IR::U128 result = v.ir.ZeroVector();
for (size_t i = 0; i < elements; i++) {
const IR::UAny op1_elem = v.ir.VectorGetElement(esize, operand1, i);
const IR::UAny op2_elem = v.ir.VectorGetElement(esize, operand2, i);
const auto result_elem = [&] {
if (sign == Signedness::Signed) {
if (op == Operation::Add) {
return v.ir.SignedSaturatedAdd(op1_elem, op2_elem);
}
return v.ir.SignedSaturatedSub(op1_elem, op2_elem);
}
if (op == Operation::Add) {
return v.ir.UnsignedSaturatedAdd(op1_elem, op2_elem);
}
return v.ir.UnsignedSaturatedSub(op1_elem, op2_elem);
}();
v.ir.OrQC(result_elem.overflow);
result = v.ir.VectorSetElement(esize, result, i, result_elem.result);
}
v.V(datasize, Vd, result);
return true;
}
} // Anonymous namespace } // Anonymous namespace
bool TranslatorVisitor::CMGT_reg_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::CMGT_reg_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
@ -347,19 +389,19 @@ bool TranslatorVisitor::MUL_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
} }
bool TranslatorVisitor::ADDHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::ADDHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, HighNarrowingOp::Add, ExtraBehavior::None); return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, Operation::Add, ExtraBehavior::None);
} }
bool TranslatorVisitor::RADDHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::RADDHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, HighNarrowingOp::Add, ExtraBehavior::Round); return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, Operation::Add, ExtraBehavior::Round);
} }
bool TranslatorVisitor::SUBHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::SUBHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, HighNarrowingOp::Subtract, ExtraBehavior::None); return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, Operation::Subtract, ExtraBehavior::None);
} }
bool TranslatorVisitor::RSUBHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::RSUBHN(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, HighNarrowingOp::Subtract, ExtraBehavior::Round); return HighNarrowingOperation(*this, Q, size, Vm, Vn, Vd, Operation::Subtract, ExtraBehavior::Round);
} }
bool TranslatorVisitor::SHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::SHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
@ -394,6 +436,14 @@ bool TranslatorVisitor::SHSUB(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::SQADD_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return SaturatingArithmeticOperation(*this, Q, size, Vm, Vn, Vd, Operation::Add, Signedness::Signed);
}
bool TranslatorVisitor::SQSUB_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return SaturatingArithmeticOperation(*this, Q, size, Vm, Vn, Vd, Operation::Subtract, Signedness::Signed);
}
bool TranslatorVisitor::SRHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::SRHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return RoundingHalvingAdd(*this, Q, size, Vm, Vn, Vd, Signedness::Signed); return RoundingHalvingAdd(*this, Q, size, Vm, Vn, Vd, Signedness::Signed);
} }
@ -430,6 +480,14 @@ bool TranslatorVisitor::UHSUB(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return true; return true;
} }
bool TranslatorVisitor::UQADD_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return SaturatingArithmeticOperation(*this, Q, size, Vm, Vn, Vd, Operation::Add, Signedness::Unsigned);
}
bool TranslatorVisitor::UQSUB_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return SaturatingArithmeticOperation(*this, Q, size, Vm, Vn, Vd, Operation::Subtract, Signedness::Unsigned);
}
bool TranslatorVisitor::URHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { bool TranslatorVisitor::URHADD(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return RoundingHalvingAdd(*this, Q, size, Vm, Vn, Vd, Signedness::Unsigned); return RoundingHalvingAdd(*this, Q, size, Vm, Vn, Vd, Signedness::Unsigned);
} }