diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index e22bcde4..27f6db1d 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -664,13 +664,13 @@ INST(SADDLV, "SADDLV", "0Q001 //INST(SMINV, "SMINV", "0Q001110zz110001101010nnnnnddddd") INST(ADDV, "ADDV", "0Q001110zz110001101110nnnnnddddd") //INST(FMAXNMV_1, "FMAXNMV", "0Q00111000110000110010nnnnnddddd") -//INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") +INST(FMAXNMV_2, "FMAXNMV", "0Q1011100z110000110010nnnnnddddd") //INST(FMAXV_1, "FMAXV", "0Q00111000110000111110nnnnnddddd") -//INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") +INST(FMAXV_2, "FMAXV", "0Q1011100z110000111110nnnnnddddd") //INST(FMINNMV_1, "FMINNMV", "0Q00111010110000110010nnnnnddddd") -//INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") +INST(FMINNMV_2, "FMINNMV", "0Q1011101z110000110010nnnnnddddd") //INST(FMINV_1, "FMINV", "0Q00111010110000111110nnnnnddddd") -//INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") +INST(FMINV_2, "FMINV", "0Q1011101z110000111110nnnnnddddd") INST(UADDLV, "UADDLV", "0Q101110zz110000001110nnnnnddddd") //INST(UMAXV, "UMAXV", "0Q101110zz110000101010nnnnnddddd") //INST(UMINV, "UMINV", "0Q101110zz110001101010nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_across_lanes.cpp b/src/frontend/A64/translate/impl/simd_across_lanes.cpp index 559adee2..771c1f57 100644 --- a/src/frontend/A64/translate/impl/simd_across_lanes.cpp +++ b/src/frontend/A64/translate/impl/simd_across_lanes.cpp @@ -49,6 +49,60 @@ bool LongAdd(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vn, Vec Vd, Signedne return true; } + +enum class MinMaxOperation { + Max, + MaxNumeric, + Min, + MinNumeric, +}; + +bool FPMinMax(TranslatorVisitor& v, bool Q, bool sz, Vec Vn, Vec Vd, MinMaxOperation operation) { + if (!Q || sz) { + return v.ReservedValue(); + } + + const size_t esize = 32; + const size_t datasize = 128; + const size_t elements = datasize / esize; + + const IR::U128 operand = v.V(datasize, Vn); + + const auto op = [&](const IR::U32U64& lhs, const IR::U32U64& rhs) { + switch (operation) { + case MinMaxOperation::Max: + return v.ir.FPMax(lhs, rhs, true); + case MinMaxOperation::MaxNumeric: + return v.ir.FPMaxNumeric(lhs, rhs, true); + case MinMaxOperation::Min: + return v.ir.FPMin(lhs, rhs, true); + case MinMaxOperation::MinNumeric: + return v.ir.FPMinNumeric(lhs, rhs, true); + default: + UNREACHABLE(); + return IR::U32U64{}; + } + }; + + const auto reduce = [&](size_t start, size_t end) { + IR::U32U64 result = v.ir.VectorGetElement(esize, operand, start); + + for (size_t i = start + 1; i < end; i++) { + const IR::U32U64 element = v.ir.VectorGetElement(esize, operand, i); + + result = op(result, element); + } + + return result; + }; + + const IR::U32U64 hi = reduce(elements / 2, elements); + const IR::U32U64 lo = reduce(0, elements / 2); + const IR::U32U64 result = op(lo, hi); + + v.V_scalar(esize, Vd, result); + return true; +} } // Anonymous namespace bool TranslatorVisitor::ADDV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { @@ -82,6 +136,22 @@ bool TranslatorVisitor::ADDV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { return true; } +bool TranslatorVisitor::FMAXNMV_2(bool Q, bool sz, Vec Vn, Vec Vd) { + return FPMinMax(*this, Q, sz, Vn, Vd, MinMaxOperation::MaxNumeric); +} + +bool TranslatorVisitor::FMAXV_2(bool Q, bool sz, Vec Vn, Vec Vd) { + return FPMinMax(*this, Q, sz, Vn, Vd, MinMaxOperation::Max); +} + +bool TranslatorVisitor::FMINNMV_2(bool Q, bool sz, Vec Vn, Vec Vd) { + return FPMinMax(*this, Q, sz, Vn, Vd, MinMaxOperation::MinNumeric); +} + +bool TranslatorVisitor::FMINV_2(bool Q, bool sz, Vec Vn, Vec Vd) { + return FPMinMax(*this, Q, sz, Vn, Vd, MinMaxOperation::Min); +} + bool TranslatorVisitor::SADDLV(bool Q, Imm<2> size, Vec Vn, Vec Vd) { return LongAdd(*this, Q, size, Vn, Vd, Signedness::Signed); }