diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index a95c97e7..0888ad07 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -4100,6 +4100,58 @@ void EmitX64::EmitVectorUnsignedSaturatedNarrow64(EmitContext& ctx, IR::Inst* in }); } +template > +static bool VectorUnsignedSaturatedShiftLeft(VectorArray& dst, const VectorArray& data, const VectorArray& shift_values) { + static_assert(std::is_unsigned_v, "T must be an unsigned type."); + + bool qc_flag = false; + + constexpr size_t bit_size = Common::BitSize(); + constexpr S negative_bit_size = -static_cast(bit_size); + + for (size_t i = 0; i < dst.size(); i++) { + const T element = data[i]; + const S shift = std::clamp(static_cast(Common::SignExtend<8>(shift_values[i] & 0xFF)), + negative_bit_size, std::numeric_limits::max()); + + if (element == 0 || shift <= negative_bit_size) { + dst[i] = 0; + } else if (shift < 0) { + dst[i] = static_cast(element >> -shift); + } else if (shift >= static_cast(bit_size)) { + dst[i] = std::numeric_limits::max(); + qc_flag = true; + } else { + const T shifted = element << shift; + + if ((shifted >> shift) != element) { + dst[i] = std::numeric_limits::max(); + qc_flag = true; + } else { + dst[i] = shifted; + } + } + } + + return qc_flag; +} + +void EmitX64::EmitVectorUnsignedSaturatedShiftLeft8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorUnsignedSaturatedShiftLeft16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorUnsignedSaturatedShiftLeft32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft); +} + +void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorUnsignedSaturatedShiftLeft); +} + void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index b2c2ff07..9ae1fc21 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1735,6 +1735,21 @@ U128 IREmitter::VectorUnsignedSaturatedNarrow(size_t esize, const U128& a) { return {}; } +U128 IREmitter::VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorUnsignedSaturatedShiftLeft8, a, b); + case 16: + return Inst(Opcode::VectorUnsignedSaturatedShiftLeft16, a, b); + case 32: + return Inst(Opcode::VectorUnsignedSaturatedShiftLeft32, a, b); + case 64: + return Inst(Opcode::VectorUnsignedSaturatedShiftLeft64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorZeroExtend(size_t original_esize, const U128& a) { switch (original_esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 24527321..22f252de 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -287,6 +287,7 @@ public: U128 VectorUnsignedRecipSqrtEstimate(const U128& a); U128 VectorUnsignedSaturatedAccumulateSigned(size_t esize, const U128& a, const U128& b); U128 VectorUnsignedSaturatedNarrow(size_t esize, const U128& a); + U128 VectorUnsignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b); U128 VectorZeroExtend(size_t original_esize, const U128& a); U128 VectorZeroUpper(const U128& a); U128 ZeroVector(); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 02d3dd4c..a2994e5a 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -380,6 +380,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorUnsignedSaturatedNarrow16: case Opcode::VectorUnsignedSaturatedNarrow32: case Opcode::VectorUnsignedSaturatedNarrow64: + case Opcode::VectorUnsignedSaturatedShiftLeft8: + case Opcode::VectorUnsignedSaturatedShiftLeft16: + case Opcode::VectorUnsignedSaturatedShiftLeft32: + case Opcode::VectorUnsignedSaturatedShiftLeft64: return true; default: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 2aae4d44..6c5774fe 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -446,6 +446,10 @@ OPCODE(VectorUnsignedSaturatedAccumulateSigned64, U128, U128 OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 ) OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 ) OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 ) +OPCODE(VectorUnsignedSaturatedShiftLeft8, U128, U128, U128 ) +OPCODE(VectorUnsignedSaturatedShiftLeft16, U128, U128, U128 ) +OPCODE(VectorUnsignedSaturatedShiftLeft32, U128, U128, U128 ) +OPCODE(VectorUnsignedSaturatedShiftLeft64, U128, U128, U128 ) OPCODE(VectorZeroExtend8, U128, U128 ) OPCODE(VectorZeroExtend16, U128, U128 ) OPCODE(VectorZeroExtend32, U128, U128 )