A64: Implement FCMLA

This commit is contained in:
Lioncash 2019-03-02 22:26:55 -05:00 committed by MerryMage
parent 5ce17574f9
commit ed29ef8cca
2 changed files with 74 additions and 1 deletions

View file

@ -605,7 +605,7 @@ INST(INS_elt, "INS (element)", "01101
// Data Processing - FP and SIMD - SIMD Three same extra // Data Processing - FP and SIMD - SIMD Three same extra
INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd") INST(SDOT_vec, "SDOT (vector)", "0Q001110zz0mmmmm100101nnnnnddddd")
INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd") INST(UDOT_vec, "UDOT (vector)", "0Q101110zz0mmmmm100101nnnnnddddd")
//INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd") INST(FCMLA_vec, "FCMLA", "0Q101110zz0mmmmm110rr1nnnnnddddd")
INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd") INST(FCADD_vec, "FCADD", "0Q101110zz0mmmmm111r01nnnnnddddd")
// Data Processing - FP and SIMD - SIMD Two-register misc // Data Processing - FP and SIMD - SIMD Two-register misc

View file

@ -53,6 +53,79 @@ bool TranslatorVisitor::UDOT_vec(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) {
return DotProduct(*this, Q, size, Vm, Vn, Vd, &IREmitter::ZeroExtendToWord); return DotProduct(*this, Q, size, Vm, Vn, Vd, &IREmitter::ZeroExtendToWord);
} }
bool TranslatorVisitor::FCMLA_vec(bool Q, Imm<2> size, Vec Vm, Imm<2> rot, Vec Vn, Vec Vd) {
if (size == 0) {
return UnallocatedEncoding();
}
if (!Q && size == 0b11) {
return UnallocatedEncoding();
}
const size_t esize = 8U << size.ZeroExtend();
// TODO: Currently we don't support half-precision floating point
if (esize == 16) {
return UnallocatedEncoding();
}
const size_t datasize = Q ? 128 : 64;
const size_t num_elements = datasize / esize;
const size_t num_iterations = num_elements / 2;
const IR::U128 operand1 = V(datasize, Vn);
const IR::U128 operand2 = V(datasize, Vm);
const IR::U128 operand3 = V(datasize, Vd);
IR::U128 result = ir.ZeroVector();
IR::U32U64 element1;
IR::U32U64 element2;
IR::U32U64 element3;
IR::U32U64 element4;
for (size_t e = 0; e < num_iterations; ++e) {
const size_t first = e * 2;
const size_t second = first + 1;
switch (rot.ZeroExtend()) {
case 0b00: // 0 degrees
element1 = ir.VectorGetElement(esize, operand2, first);
element2 = ir.VectorGetElement(esize, operand1, first);
element3 = ir.VectorGetElement(esize, operand2, second);
element4 = ir.VectorGetElement(esize, operand1, first);
break;
case 0b01: // 90 degrees
element1 = ir.FPNeg(ir.VectorGetElement(esize, operand2, second));
element2 = ir.VectorGetElement(esize, operand1, second);
element3 = ir.VectorGetElement(esize, operand2, first);
element4 = ir.VectorGetElement(esize, operand1, second);
break;
case 0b10: // 180 degrees
element1 = ir.FPNeg(ir.VectorGetElement(esize, operand2, first));
element2 = ir.VectorGetElement(esize, operand1, first);
element3 = ir.FPNeg(ir.VectorGetElement(esize, operand2, second));
element4 = ir.VectorGetElement(esize, operand1, first);
break;
case 0b11: // 270 degrees
element1 = ir.VectorGetElement(esize, operand2, second);
element2 = ir.VectorGetElement(esize, operand1, second);
element3 = ir.FPNeg(ir.VectorGetElement(esize, operand2, first));
element4 = ir.VectorGetElement(esize, operand1, second);
break;
}
const IR::U32U64 operand3_elem1 = ir.VectorGetElement(esize, operand3, first);
const IR::U32U64 operand3_elem2 = ir.VectorGetElement(esize, operand3, second);
result = ir.VectorSetElement(esize, result, first,
ir.FPMulAdd(operand3_elem1, element2, element1, true));
result = ir.VectorSetElement(esize, result, second,
ir.FPMulAdd(operand3_elem2, element4, element3, true));
}
ir.SetQ(Vd, result);
return true;
}
bool TranslatorVisitor::FCADD_vec(bool Q, Imm<2> size, Vec Vm, Imm<1> rot, Vec Vn, Vec Vd) { bool TranslatorVisitor::FCADD_vec(bool Q, Imm<2> size, Vec Vm, Imm<1> rot, Vec Vn, Vec Vd) {
if (size == 0) { if (size == 0) {
return UnallocatedEncoding(); return UnallocatedEncoding();