ir: Add opcodes for vector paired maximum and minimums
For the time being, we can just do a naive implementation which avoids falling back to the interpreter a bit. Horizontal operations aren't necessarily x86 SIMD's forte anyways.
This commit is contained in:
parent
43344c5400
commit
463b9a3d02
4 changed files with 167 additions and 0 deletions
|
@ -1866,6 +1866,101 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins
|
|||
ctx.reg_alloc.DefineValue(inst, a);
|
||||
}
|
||||
|
||||
template <typename T, typename Function>
|
||||
static void PairedOperation(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y, Function fn) {
|
||||
const size_t range = x.size() / 2;
|
||||
|
||||
for (size_t i = 0; i < range; i++) {
|
||||
result[i] = fn(x[2 * i], x[2 * i + 1]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < range; i++) {
|
||||
result[range + i] = fn(y[2 * i], y[2 * i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void PairedMax(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
||||
PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void PairedMin(VectorArray<T>& result, const VectorArray<T>& x, const VectorArray<T>& y) {
|
||||
PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); });
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s8>& result, const VectorArray<s8>& a, const VectorArray<s8>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s16>& result, const VectorArray<s16>& a, const VectorArray<s16>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s32>& result, const VectorArray<s32>& a, const VectorArray<s32>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a, const VectorArray<u8>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& a, const VectorArray<u16>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<u32>& a, const VectorArray<u32>& b) {
|
||||
PairedMax(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s8>& result, const VectorArray<s8>& a, const VectorArray<s8>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s16>& result, const VectorArray<s16>& a, const VectorArray<s16>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<s32>& result, const VectorArray<s32>& a, const VectorArray<s32>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u8>& a, const VectorArray<u8>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u16>& result, const VectorArray<u16>& a, const VectorArray<u16>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray<u32>& result, const VectorArray<u32>& a, const VectorArray<u32>& b) {
|
||||
PairedMin(result, a, b);
|
||||
});
|
||||
}
|
||||
|
||||
template <typename D, typename T>
|
||||
static D PolynomialMultiply(T lhs, T rhs) {
|
||||
constexpr size_t bit_size = Common::BitSize<T>();
|
||||
|
|
|
@ -1226,6 +1226,62 @@ U128 IREmitter::VectorPairedAddUnsignedWiden(size_t original_esize, const U128&
|
|||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedMaxSigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxS8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxS16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxS32, a, b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedMaxUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxU8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxU16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedMaxU32, a, b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedMinSigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedMinS8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedMinS16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedMinS32, a, b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 8:
|
||||
return Inst<U128>(Opcode::VectorPairedMinU8, a, b);
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorPairedMinU16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorPairedMinU32, a, b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorPolynomialMultiply(const U128& a, const U128& b) {
|
||||
return Inst<U128>(Opcode::VectorPolynomialMultiply8, a, b);
|
||||
}
|
||||
|
|
|
@ -242,6 +242,10 @@ public:
|
|||
U128 VectorPairedAddLower(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedAddSignedWiden(size_t original_esize, const U128& a);
|
||||
U128 VectorPairedAddUnsignedWiden(size_t original_esize, const U128& a);
|
||||
U128 VectorPairedMaxSigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedMaxUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedMinSigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPairedMinUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPolynomialMultiply(const U128& a, const U128& b);
|
||||
U128 VectorPolynomialMultiplyLong(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorPopulationCount(const U128& a);
|
||||
|
|
|
@ -338,6 +338,18 @@ OPCODE(VectorPairedAdd8, T::U128, T::U128,
|
|||
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxS8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxS16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxS32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxU8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxU16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMaxU32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinS8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinS16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinS32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinU8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinU16, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPairedMinU32, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPolynomialMultiply8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPolynomialMultiplyLong8, T::U128, T::U128, T::U128 )
|
||||
OPCODE(VectorPolynomialMultiplyLong64, T::U128, T::U128, T::U128 )
|
||||
|
|
Loading…
Reference in a new issue