From 8fb37e0e4f77cd860c2ffa33fb87409031c68593 Mon Sep 17 00:00:00 2001 From: Merry Date: Sat, 6 Aug 2022 18:00:14 +0100 Subject: [PATCH] IR: Introduce VectorPaired{Min,Max}Lower --- .../backend/arm64/emit_arm64_vector.cpp | 96 +++++++++++++++++++ src/dynarmic/backend/x64/emit_x64_vector.cpp | 95 ++++++++++++++++++ .../A64/translate/impl/simd_three_same.cpp | 12 +-- src/dynarmic/ir/ir_emitter.cpp | 52 ++++++++++ src/dynarmic/ir/ir_emitter.h | 4 + src/dynarmic/ir/opcodes.inc | 12 +++ 6 files changed, 263 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp index bfb17053..770bdc51 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_vector.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_vector.cpp @@ -1150,6 +1150,102 @@ void EmitIR(oaknut::CodeGenerator& code, EmitCon EmitThreeOpArranged<32>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.UMINP(Vresult, Va, Vb); }); } +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + +template<> +void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { + (void)code; + (void)ctx; + (void)inst; + ASSERT_FALSE("Unimplemented"); +} + template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { EmitThreeOpArranged<8>(code, ctx, inst, [&](auto Vresult, auto Va, auto Vb) { code.PMUL(Vresult, Va, Vb); }); diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index b402b217..156ee13d 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -2646,6 +2646,19 @@ static void PairedOperation(VectorArray& result, const VectorArray& x, con } } +template +static void LowerPairedOperation(VectorArray& result, const VectorArray& x, const VectorArray& y, Function fn) { + const size_t range = x.size() / 4; + + for (size_t i = 0; i < range; i++) { + result[i] = fn(x[2 * i], x[2 * i + 1]); + } + + for (size_t i = 0; i < range; i++) { + result[range + i] = fn(y[2 * i], y[2 * i + 1]); + } +} + template static void PairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); @@ -2656,6 +2669,16 @@ static void PairedMin(VectorArray& result, const VectorArray& x, const Vec PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); } +template +static void LowerPairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { + LowerPairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); +} + +template +static void LowerPairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { + LowerPairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); +} + void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { PairedMax(result, a, b); @@ -2826,6 +2849,78 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { } } +void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxLowerS16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxLowerS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxLowerU8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxLowerU16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxLowerU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerS8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerS16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerU8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerU16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinLowerU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + LowerPairedMin(result, a, b); + }); +} + template static D PolynomialMultiply(T lhs, T rhs) { constexpr size_t bit_size = mcl::bitsizeof; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp index 6d78c537..1b4fcfad 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp @@ -264,25 +264,21 @@ bool PairedMinMaxOperation(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, Ve switch (operation) { case MinMaxOperation::Max: if (sign == Signedness::Signed) { - return v.ir.VectorPairedMaxSigned(esize, operand1, operand2); + return Q ? v.ir.VectorPairedMaxSigned(esize, operand1, operand2) : v.ir.VectorPairedMaxSignedLower(esize, operand1, operand2); } - return v.ir.VectorPairedMaxUnsigned(esize, operand1, operand2); + return Q ? v.ir.VectorPairedMaxUnsigned(esize, operand1, operand2) : v.ir.VectorPairedMaxUnsignedLower(esize, operand1, operand2); case MinMaxOperation::Min: if (sign == Signedness::Signed) { - return v.ir.VectorPairedMinSigned(esize, operand1, operand2); + return Q ? v.ir.VectorPairedMinSigned(esize, operand1, operand2) : v.ir.VectorPairedMinSignedLower(esize, operand1, operand2); } - return v.ir.VectorPairedMinUnsigned(esize, operand1, operand2); + return Q ? v.ir.VectorPairedMinUnsigned(esize, operand1, operand2) : v.ir.VectorPairedMinUnsignedLower(esize, operand1, operand2); default: UNREACHABLE(); } }(); - if (datasize == 64) { - result = v.ir.VectorShuffleWords(result, 0b11011000); - } - v.V(datasize, Vd, result); return true; } diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index 9d1746b3..525a78a3 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -1550,6 +1550,58 @@ U128 IREmitter::VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& } } +U128 IREmitter::VectorPairedMaxSignedLower(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMaxLowerS8, a, b); + case 16: + return Inst(Opcode::VectorPairedMaxLowerS16, a, b); + case 32: + return Inst(Opcode::VectorPairedMaxLowerS32, a, b); + default: + UNREACHABLE(); + } +} + +U128 IREmitter::VectorPairedMaxUnsignedLower(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMaxLowerU8, a, b); + case 16: + return Inst(Opcode::VectorPairedMaxLowerU16, a, b); + case 32: + return Inst(Opcode::VectorPairedMaxLowerU32, a, b); + default: + UNREACHABLE(); + } +} + +U128 IREmitter::VectorPairedMinSignedLower(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMinLowerS8, a, b); + case 16: + return Inst(Opcode::VectorPairedMinLowerS16, a, b); + case 32: + return Inst(Opcode::VectorPairedMinLowerS32, a, b); + default: + UNREACHABLE(); + } +} + +U128 IREmitter::VectorPairedMinUnsignedLower(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMinLowerU8, a, b); + case 16: + return Inst(Opcode::VectorPairedMinLowerU16, a, b); + case 32: + return Inst(Opcode::VectorPairedMinLowerU32, a, b); + default: + UNREACHABLE(); + } +} + U128 IREmitter::VectorPolynomialMultiply(const U128& a, const U128& b) { return Inst(Opcode::VectorPolynomialMultiply8, a, b); } diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index 6f6b499c..f679a344 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -277,6 +277,10 @@ public: U128 VectorPairedMaxUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorPairedMinSigned(size_t esize, const U128& a, const U128& b); U128 VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMaxSignedLower(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMaxUnsignedLower(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMinSignedLower(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMinUnsignedLower(size_t esize, const U128& a, const U128& b); U128 VectorPolynomialMultiply(const U128& a, const U128& b); U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorPopulationCount(const U128& a); diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 1af47b0a..add9aa93 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -438,6 +438,18 @@ OPCODE(VectorPairedMinS32, U128, U128 OPCODE(VectorPairedMinU8, U128, U128, U128 ) OPCODE(VectorPairedMinU16, U128, U128, U128 ) OPCODE(VectorPairedMinU32, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerS8, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerS16, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerS32, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerU8, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerU16, U128, U128, U128 ) +OPCODE(VectorPairedMaxLowerU32, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerS8, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerS16, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerS32, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerU8, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerU16, U128, U128, U128 ) +OPCODE(VectorPairedMinLowerU32, U128, U128, U128 ) OPCODE(VectorPolynomialMultiply8, U128, U128, U128 ) OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 ) OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 )