ir: Add opcodes for left signed saturated shifts
This commit is contained in:
parent
da55ed7b31
commit
b14eaaec46
5 changed files with 88 additions and 0 deletions
|
@ -3572,6 +3572,70 @@ void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MSVC requires the capture within the saturate lambda, but it's
|
||||||
|
// determined to be unnecessary via clang and GCC.
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wunused-lambda-capture"
|
||||||
|
#endif
|
||||||
|
template <typename T, typename U = std::make_unsigned_t<T>>
|
||||||
|
static bool VectorSignedSaturatedShiftLeft(VectorArray<T>& dst, const VectorArray<T>& data, const VectorArray<T>& shift_values) {
|
||||||
|
static_assert(std::is_signed_v<T>, "T must be signed.");
|
||||||
|
|
||||||
|
bool qc_flag = false;
|
||||||
|
|
||||||
|
constexpr size_t bit_size_minus_one = Common::BitSize<T>() - 1;
|
||||||
|
|
||||||
|
const auto saturate = [bit_size_minus_one](T value) {
|
||||||
|
return static_cast<T>((static_cast<U>(value) >> bit_size_minus_one) + (U{1} << bit_size_minus_one) - 1);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (size_t i = 0; i < dst.size(); i++) {
|
||||||
|
const T element = data[i];
|
||||||
|
const T shift = std::clamp<T>(static_cast<T>(Common::SignExtend<8>(shift_values[i] & 0xFF)),
|
||||||
|
-static_cast<T>(bit_size_minus_one), std::numeric_limits<T>::max());
|
||||||
|
|
||||||
|
if (element == 0) {
|
||||||
|
dst[i] = 0;
|
||||||
|
} else if (shift < 0) {
|
||||||
|
dst[i] = static_cast<T>(element >> -shift);
|
||||||
|
} else if (static_cast<U>(shift) > bit_size_minus_one) {
|
||||||
|
dst[i] = saturate(element);
|
||||||
|
qc_flag = true;
|
||||||
|
} else {
|
||||||
|
const T shifted = element << shift;
|
||||||
|
|
||||||
|
if ((shifted >> shift) != element) {
|
||||||
|
dst[i] = saturate(element);
|
||||||
|
qc_flag = true;
|
||||||
|
} else {
|
||||||
|
dst[i] = shifted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return qc_flag;
|
||||||
|
}
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s8>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedShiftLeft16(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s16>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s32>);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft<s64>);
|
||||||
|
}
|
||||||
|
|
||||||
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1646,6 +1646,21 @@ U128 IREmitter::VectorSignedSaturatedNeg(size_t esize, const U128& a) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U128 IREmitter::VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b) {
|
||||||
|
switch (esize) {
|
||||||
|
case 8:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedShiftLeft8, a, b);
|
||||||
|
case 16:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedShiftLeft16, a, b);
|
||||||
|
case 32:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedShiftLeft32, a, b);
|
||||||
|
case 64:
|
||||||
|
return Inst<U128>(Opcode::VectorSignedSaturatedShiftLeft64, a, b);
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) {
|
||||||
switch (esize) {
|
switch (esize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
|
|
@ -278,6 +278,7 @@ public:
|
||||||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||||
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
|
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
|
||||||
|
U128 VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b);
|
||||||
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
U128 VectorSub(size_t esize, const U128& a, const U128& b);
|
||||||
Table VectorTable(std::vector<U128> values);
|
Table VectorTable(std::vector<U128> values);
|
||||||
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
|
U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices);
|
||||||
|
|
|
@ -369,6 +369,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||||
case Opcode::VectorSignedSaturatedNeg16:
|
case Opcode::VectorSignedSaturatedNeg16:
|
||||||
case Opcode::VectorSignedSaturatedNeg32:
|
case Opcode::VectorSignedSaturatedNeg32:
|
||||||
case Opcode::VectorSignedSaturatedNeg64:
|
case Opcode::VectorSignedSaturatedNeg64:
|
||||||
|
case Opcode::VectorSignedSaturatedShiftLeft8:
|
||||||
|
case Opcode::VectorSignedSaturatedShiftLeft16:
|
||||||
|
case Opcode::VectorSignedSaturatedShiftLeft32:
|
||||||
|
case Opcode::VectorSignedSaturatedShiftLeft64:
|
||||||
case Opcode::VectorUnsignedSaturatedAccumulateSigned8:
|
case Opcode::VectorUnsignedSaturatedAccumulateSigned8:
|
||||||
case Opcode::VectorUnsignedSaturatedAccumulateSigned16:
|
case Opcode::VectorUnsignedSaturatedAccumulateSigned16:
|
||||||
case Opcode::VectorUnsignedSaturatedAccumulateSigned32:
|
case Opcode::VectorUnsignedSaturatedAccumulateSigned32:
|
||||||
|
|
|
@ -422,6 +422,10 @@ OPCODE(VectorSignedSaturatedNeg8, U128, U128
|
||||||
OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
|
OPCODE(VectorSignedSaturatedNeg16, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
|
OPCODE(VectorSignedSaturatedNeg32, U128, U128 )
|
||||||
OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
|
OPCODE(VectorSignedSaturatedNeg64, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 )
|
||||||
|
OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 )
|
||||||
OPCODE(VectorSub8, U128, U128, U128 )
|
OPCODE(VectorSub8, U128, U128, U128 )
|
||||||
OPCODE(VectorSub16, U128, U128, U128 )
|
OPCODE(VectorSub16, U128, U128, U128 )
|
||||||
OPCODE(VectorSub32, U128, U128, U128 )
|
OPCODE(VectorSub32, U128, U128, U128 )
|
||||||
|
|
Loading…
Reference in a new issue