diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 3ca545a3..a95c97e7 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -3572,6 +3572,70 @@ void EmitX64::EmitVectorSignedSaturatedNeg64(EmitContext& ctx, IR::Inst* inst) { }); } +// MSVC requires the capture within the saturate lambda, but it's +// determined to be unnecessary via clang and GCC. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-lambda-capture" +#endif +template > +static bool VectorSignedSaturatedShiftLeft(VectorArray& dst, const VectorArray& data, const VectorArray& shift_values) { + static_assert(std::is_signed_v, "T must be signed."); + + bool qc_flag = false; + + constexpr size_t bit_size_minus_one = Common::BitSize() - 1; + + const auto saturate = [bit_size_minus_one](T value) { + return static_cast((static_cast(value) >> bit_size_minus_one) + (U{1} << bit_size_minus_one) - 1); + }; + + for (size_t i = 0; i < dst.size(); i++) { + const T element = data[i]; + const T shift = std::clamp(static_cast(Common::SignExtend<8>(shift_values[i] & 0xFF)), + -static_cast(bit_size_minus_one), std::numeric_limits::max()); + + if (element == 0) { + dst[i] = 0; + } else if (shift < 0) { + dst[i] = static_cast(element >> -shift); + } else if (static_cast(shift) > bit_size_minus_one) { + dst[i] = saturate(element); + qc_flag = true; + } else { + const T shifted = element << shift; + + if ((shifted >> shift) != element) { + dst[i] = saturate(element); + qc_flag = true; + } else { + dst[i] = shifted; + } + } + } + + return qc_flag; +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +void EmitX64::EmitVectorSignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeft16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeft32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); +} + void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index e81b528b..b4bd3617 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1646,6 +1646,21 @@ U128 IREmitter::VectorSignedSaturatedNeg(size_t esize, const U128& a) { return {}; } +U128 IREmitter::VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorSignedSaturatedShiftLeft8, a, b); + case 16: + return Inst(Opcode::VectorSignedSaturatedShiftLeft16, a, b); + case 32: + return Inst(Opcode::VectorSignedSaturatedShiftLeft32, a, b); + case 64: + return Inst(Opcode::VectorSignedSaturatedShiftLeft64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 016cda79..a6523d23 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -278,6 +278,7 @@ public: U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNeg(size_t esize, const U128& a); + U128 VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b); Table VectorTable(std::vector values); U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 5a91ab92..9994d920 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -369,6 +369,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorSignedSaturatedNeg16: case Opcode::VectorSignedSaturatedNeg32: case Opcode::VectorSignedSaturatedNeg64: + case Opcode::VectorSignedSaturatedShiftLeft8: + case Opcode::VectorSignedSaturatedShiftLeft16: + case Opcode::VectorSignedSaturatedShiftLeft32: + case Opcode::VectorSignedSaturatedShiftLeft64: case Opcode::VectorUnsignedSaturatedAccumulateSigned8: case Opcode::VectorUnsignedSaturatedAccumulateSigned16: case Opcode::VectorUnsignedSaturatedAccumulateSigned32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 504f5bd7..29cee190 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -422,6 +422,10 @@ OPCODE(VectorSignedSaturatedNeg8, U128, U128 OPCODE(VectorSignedSaturatedNeg16, U128, U128 ) OPCODE(VectorSignedSaturatedNeg32, U128, U128 ) OPCODE(VectorSignedSaturatedNeg64, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 ) OPCODE(VectorSub8, U128, U128, U128 ) OPCODE(VectorSub16, U128, U128, U128 ) OPCODE(VectorSub32, U128, U128, U128 )