diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 2b0d4252..aa099831 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -1866,6 +1866,101 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins ctx.reg_alloc.DefineValue(inst, a); } +template +static void PairedOperation(VectorArray& result, const VectorArray& x, const VectorArray& y, Function fn) { + const size_t range = x.size() / 2; + + for (size_t i = 0; i < range; i++) { + result[i] = fn(x[2 * i], x[2 * i + 1]); + } + + for (size_t i = 0; i < range; i++) { + result[range + i] = fn(y[2 * i], y[2 * i + 1]); + } +} + +template +static void PairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { + PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); +} + +template +static void PairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { + PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); +} + +void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMax(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + +void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + PairedMin(result, a, b); + }); +} + template static D PolynomialMultiply(T lhs, T rhs) { constexpr size_t bit_size = Common::BitSize(); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 212ab255..7322d57f 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1226,6 +1226,62 @@ U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128& return {}; } +U128 IREmitter::VectorPairedMaxSigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMaxS8, a, b); + case 16: + return Inst(Opcode::VectorPairedMaxS16, a, b); + case 32: + return Inst(Opcode::VectorPairedMaxS32, a, b); + default: + UNREACHABLE(); + return {}; + } +} + +U128 IREmitter::VectorPairedMaxUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMaxU8, a, b); + case 16: + return Inst(Opcode::VectorPairedMaxU16, a, b); + case 32: + return Inst(Opcode::VectorPairedMaxU32, a, b); + default: + UNREACHABLE(); + return {}; + } +} + +U128 IREmitter::VectorPairedMinSigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMinS8, a, b); + case 16: + return Inst(Opcode::VectorPairedMinS16, a, b); + case 32: + return Inst(Opcode::VectorPairedMinS32, a, b); + default: + UNREACHABLE(); + return {}; + } +} + +U128 IREmitter::VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorPairedMinU8, a, b); + case 16: + return Inst(Opcode::VectorPairedMinU16, a, b); + case 32: + return Inst(Opcode::VectorPairedMinU32, a, b); + default: + UNREACHABLE(); + return {}; + } +} + U128 IREmitter::VectorPolynomialMultiply(const U128& a, const U128& b) { return Inst(Opcode::VectorPolynomialMultiply8, a, b); } diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index ffd83710..10a00f5d 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -242,6 +242,10 @@ public: U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b); U128 VectorPairedAddSignedWiden(size_t original_esize, const U128& a); U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a); + U128 VectorPairedMaxSigned(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMaxUnsigned(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMinSigned(size_t esize, const U128& a, const U128& b); + U128 VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorPolynomialMultiply(const U128& a, const U128& b); U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b); U128 VectorPopulationCount(const U128& a); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 3bb1400a..2c4f1b77 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -338,6 +338,18 @@ OPCODE(VectorPairedAdd8, T::U128, T::U128, OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 ) OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxU8, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxU16, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMaxU32, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinS8, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinS16, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinS32, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinU8, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinU16, T::U128, T::U128, T::U128 ) +OPCODE(VectorPairedMinU32, T::U128, T::U128, T::U128 ) OPCODE(VectorPolynomialMultiply8, T::U128, T::U128, T::U128 ) OPCODE(VectorPolynomialMultiplyLong8, T::U128, T::U128, T::U128 ) OPCODE(VectorPolynomialMultiplyLong64, T::U128, T::U128, T::U128 )