ir: Add opcodes form unsigned saturated accumulations of signed values

This commit is contained in:
Lioncash 2018-09-09 02:01:09 -04:00 committed by MerryMage
parent 18ad7f237d
commit d4a76aaa04
5 changed files with 91 additions and 0 deletions

View file

@ -3445,6 +3445,73 @@ void EmitX64::EmitVectorUnsignedRecipSqrtEstimate(EmitContext& ctx, IR::Inst* in
}); });
} }
// Simple generic case for 8, 16, and 32-bit values. 64-bit values
// will need to be special-cased as we can't simply use a larger integral size.
template <typename T, typename U = std::make_unsigned_t<T>>
bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray<U>& result, const VectorArray<T>& lhs, const VectorArray<T>& rhs) {
static_assert(std::is_signed_v<T>, "T must be signed.");
static_assert(sizeof(T) < 64, "T must be less than 64 bits in size.");
bool qc_flag = false;
for (size_t i = 0; i < result.size(); i++) {
// We treat rhs' members as unsigned, so cast to unsigned before signed to inhibit sign-extension.
// We use the unsigned equivalent of T, as we want zero-extension to occur, rather than a plain move.
const s64 x = s64{lhs[i]};
const s64 y = static_cast<s64>(static_cast<std::make_unsigned_t<U>>(rhs[i]));
const s64 sum = x + y;
if (sum > std::numeric_limits<U>::max()) {
result[i] = std::numeric_limits<U>::max();
qc_flag = true;
} else if (sum < 0) {
result[i] = std::numeric_limits<U>::min();
qc_flag = true;
} else {
result[i] = static_cast<U>(sum);
}
}
return qc_flag;
}
void EmitX64::EmitVectorUnsignedSaturatedAccumulateSigned8(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, EmitVectorUnsignedSaturatedAccumulateSigned<s8>);
}
void EmitX64::EmitVectorUnsignedSaturatedAccumulateSigned16(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, EmitVectorUnsignedSaturatedAccumulateSigned<s16>);
}
void EmitX64::EmitVectorUnsignedSaturatedAccumulateSigned32(EmitContext& ctx, IR::Inst* inst) {
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, EmitVectorUnsignedSaturatedAccumulateSigned<s32>);
}
void EmitX64::EmitVectorUnsignedSaturatedAccumulateSigned64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst* inst) {
EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u64>& result, const VectorArray<u64>& lhs, const VectorArray<u64>& rhs) {
bool qc_flag = false;
for (size_t i = 0; i < result.size(); i++) {
const u64 x = lhs[i];
const u64 y = rhs[i];
const u64 res = x + y;
// Check sign bits to determine if an overflow occurred.
if ((~x & y & ~res) & 0x8000000000000000) {
result[i] = UINT64_MAX;
qc_flag = true;
} else if ((x & ~y & res) & 0x8000000000000000) {
result[i] = 0;
qc_flag = true;
} else {
result[i] = res;
}
}
return qc_flag;
});
}
void EmitX64::EmitVectorUnsignedSaturatedNarrow16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorUnsignedSaturatedNarrow16(EmitContext& ctx, IR::Inst* inst) {
EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u16>& a) { EmitOneArgumentFallbackWithSaturation(code, ctx, inst, [](VectorArray<u8>& result, const VectorArray<u16>& a) {
bool qc_flag = false; bool qc_flag = false;

View file

@ -1642,6 +1642,21 @@ U128 IREmitter::VectorUnsignedRecipSqrtEstimate(const U128& a) {
return Inst<U128>(Opcode::VectorUnsignedRecipSqrtEstimate, a); return Inst<U128>(Opcode::VectorUnsignedRecipSqrtEstimate, a);
} }
U128 IREmitter::VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b) {
switch (esize) {
case 8:
return Inst<U128>(Opcode::VectorUnsignedSaturatedAccumulateSigned8, a, b);
case 16:
return Inst<U128>(Opcode::VectorUnsignedSaturatedAccumulateSigned16, a, b);
case 32:
return Inst<U128>(Opcode::VectorUnsignedSaturatedAccumulateSigned32, a, b);
case 64:
return Inst<U128>(Opcode::VectorUnsignedSaturatedAccumulateSigned64, a, b);
}
UNREACHABLE();
return {};
}
U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) { U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) {
switch (esize) { switch (esize) {
case 16: case 16:

View file

@ -276,6 +276,7 @@ public:
U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedAbsoluteDifference(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedRecipEstimate(const U128& a); U128 VectorUnsignedRecipEstimate(const U128& a);
U128 VectorUnsignedRecipSqrtEstimate(const U128& a); U128 VectorUnsignedRecipSqrtEstimate(const U128& a);
U128 VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b);
U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a); U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a);
U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroExtend(size_t original_esize, const U128& a);
U128 VectorZeroUpper(const U128& a); U128 VectorZeroUpper(const U128& a);

View file

@ -367,6 +367,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
case Opcode::VectorSignedSaturatedNeg16: case Opcode::VectorSignedSaturatedNeg16:
case Opcode::VectorSignedSaturatedNeg32: case Opcode::VectorSignedSaturatedNeg32:
case Opcode::VectorSignedSaturatedNeg64: case Opcode::VectorSignedSaturatedNeg64:
case Opcode::VectorUnsignedSaturatedAccumulateSigned8:
case Opcode::VectorUnsignedSaturatedAccumulateSigned16:
case Opcode::VectorUnsignedSaturatedAccumulateSigned32:
case Opcode::VectorUnsignedSaturatedAccumulateSigned64:
case Opcode::VectorUnsignedSaturatedNarrow16: case Opcode::VectorUnsignedSaturatedNarrow16:
case Opcode::VectorUnsignedSaturatedNarrow32: case Opcode::VectorUnsignedSaturatedNarrow32:
case Opcode::VectorUnsignedSaturatedNarrow64: case Opcode::VectorUnsignedSaturatedNarrow64:

View file

@ -424,6 +424,10 @@ OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128,
OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 )
OPCODE(VectorUnsignedRecipEstimate, U128, U128 ) OPCODE(VectorUnsignedRecipEstimate, U128, U128 )
OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 ) OPCODE(VectorUnsignedRecipSqrtEstimate, U128, U128 )
OPCODE(VectorUnsignedSaturatedAccumulateSigned8, U128, U128, U128 )
OPCODE(VectorUnsignedSaturatedAccumulateSigned16, U128, U128, U128 )
OPCODE(VectorUnsignedSaturatedAccumulateSigned32, U128, U128, U128 )
OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128, U128 )
OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 ) OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 )
OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 ) OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 )
OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 ) OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 )