ir: Add opcodes for unsigned saturating left shifts
This commit is contained in:
parent
ab60720418
commit
d426dfe942
5 changed files with 76 additions and 0 deletions
|
@ -4100,6 +4100,58 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, typename S = std::make_signed_t<T>>
|
||||||
|
static bool VectorUnsignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
||||||
|
static_assert(std::is_unsigned_v<T>, "T must be an unsigned type.");
|
||||||
|
|
||||||
|
bool qc_flag = false;
|
||||||
|
|
||||||
|
constexpr size_t bit_size = Common::BitSize<T>();
|
||||||
|
constexpr S negative_bit_size = -static_cast<S>(bit_size);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < dst.size(); i++) {
|
||||||
|
const T element = data[i];
|
||||||
|
const S shift = std::clamp(static_cast<S>(Common::SignExtend<8>(shift_values[i] & 0xFF)),
|
||||||
|
negative_bit_size, std::numeric_limits<S>::max());
|
||||||
|
|
||||||
|
if (element == 0 || shift <= negative_bit_size) {
|
||||||
|
dst[i] = 0;
|
||||||
|
} else if (shift < 0) {
|
||||||
|
dst[i] = static_cast<T>(element >> -shift);
|
||||||
|
} else if (shift >= static_cast<S>(bit_size)) {
|
||||||
|
dst[i] = std::numeric_limits<T>::max();
|
||||||
|
qc_flag = true;
|
||||||
|
} else {
|
||||||
|
const T shifted = element << shift;
|
||||||
|
|
||||||
|
if ((shifted >> shift) != element) {
|
||||||
|
dst[i] = std::numeric_limits<T>::max();
|
||||||
|
qc_flag = true;
|
||||||
|
} else {
|
||||||
|
dst[i] = shifted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return qc_flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u8>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedShiftLeft16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u16>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u32>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft<u64>);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||||
|
|
|
@ -1735,6 +1735,21 @@ U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft32, a, b);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorUnsignedSaturatedShiftLeft64, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) {
|
||||||
switch (original_esize) {
|
switch (original_esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -287,6 +287,7 @@ public:
|
||||||
U128 VectorUnsignedRecipSqrtEstimate(const U128& a);
|
U128 VectorUnsignedRecipSqrtEstimate(const U128& a);
|
||||||
U128 VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b);
|
U128 VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
|
||||||
|
U128 VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
U128 VectorZeroExtend(size_t original_esize, const U128& a);
|
||||||
U128 VectorZeroUpper(const U128& a);
|
U128 VectorZeroUpper(const U128& a);
|
||||||
U128 ZeroVector();
|
U128 ZeroVector();
|
||||||
|
|
|
@ -380,6 +380,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
case Opcode::VectorUnsignedSaturatedNarrow16:
|
case Opcode::VectorUnsignedSaturatedNarrow16:
|
||||||
case Opcode::VectorUnsignedSaturatedNarrow32:
|
case Opcode::VectorUnsignedSaturatedNarrow32:
|
||||||
case Opcode::VectorUnsignedSaturatedNarrow64:
|
case Opcode::VectorUnsignedSaturatedNarrow64:
|
||||||
|
case Opcode::VectorUnsignedSaturatedShiftLeft8:
|
||||||
|
case Opcode::VectorUnsignedSaturatedShiftLeft16:
|
||||||
|
case Opcode::VectorUnsignedSaturatedShiftLeft32:
|
||||||
|
case Opcode::VectorUnsignedSaturatedShiftLeft64:
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -446,6 +446,10 @@ OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128
|
||||||
OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
|
OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
|
||||||
OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
|
OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
|
||||||
OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
|
OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 )
|
||||||
|
OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 )
|
||||||
OPCODE(VectorZeroExtend8, U128, U128 )
|
OPCODE(VectorZeroExtend8, U128, U128 )
|
||||||
OPCODE(VectorZeroExtend16, U128, U128 )
|
OPCODE(VectorZeroExtend16, U128, U128 )
|
||||||
OPCODE(VectorZeroExtend32, U128, U128 )
|
OPCODE(VectorZeroExtend32, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue