diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 03d47af0..48618dae 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -3871,6 +3871,60 @@ void EmitX64::EmitVectorSignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* i EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeft); } +template > +static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const VectorArray& data, const VectorArray& shift_values) { + static_assert(std::is_signed_v, "T must be signed."); + + constexpr size_t bit_size_minus_one = Common::BitSize() - 1; + + bool qc_flag = false; + for (size_t i = 0; i < dst.size(); i++) { + const T element = data[i]; + const T shift = std::clamp(static_cast(Common::SignExtend<8>(shift_values[i] & 0xFF)), + -static_cast(bit_size_minus_one), std::numeric_limits::max()); + + if (element == 0) { + dst[i] = 0; + } else if (element < 0) { + dst[i] = 0; + qc_flag = true; + } else if (shift < 0) { + dst[i] = static_cast(element >> -shift); + } else if (static_cast(shift) > bit_size_minus_one) { + dst[i] = static_cast(std::numeric_limits::max()); + qc_flag = true; + } else { + const U shifted = static_cast(element) << static_cast(shift); + const U shifted_test = shifted >> static_cast(shift); + + if (shifted_test != static_cast(element)) { + dst[i] = static_cast(std::numeric_limits::max()); + qc_flag = true; + } else { + dst[i] = shifted; + } + } + } + + return qc_flag; +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned16(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); +} + +void EmitX64::EmitVectorSignedSaturatedShiftLeftUnsigned64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallbackWithSaturation(code, ctx, inst, VectorSignedSaturatedShiftLeftUnsigned); +} + void EmitX64::EmitVectorSub8(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubb); } diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index e02bb2fa..50867ee8 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -523,7 +523,7 @@ INST(URSHR_1, "URSHR", "01111 INST(URSRA_1, "URSRA", "011111110IIIIiii001101nnnnnddddd") INST(SRI_1, "SRI", "011111110IIIIiii010001nnnnnddddd") INST(SLI_1, "SLI", "011111110IIIIiii010101nnnnnddddd") -//INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") +INST(SQSHLU_1, "SQSHLU", "011111110IIIIiii011001nnnnnddddd") INST(UQSHL_imm_1, "UQSHL (immediate)", "011111110IIIIiii011101nnnnnddddd") INST(SQSHRUN_1, "SQSHRUN, SQSHRUN2", "011111110IIIIiii100001nnnnnddddd") //INST(SQRSHRUN_1, "SQRSHRUN, SQRSHRUN2", "011111110IIIIiii100011nnnnnddddd") @@ -855,7 +855,7 @@ INST(URSHR_2, "URSHR", "0Q101 INST(URSRA_2, "URSRA", "0Q1011110IIIIiii001101nnnnnddddd") INST(SRI_2, "SRI", "0Q1011110IIIIiii010001nnnnnddddd") INST(SLI_2, "SLI", "0Q1011110IIIIiii010101nnnnnddddd") -//INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") +INST(SQSHLU_2, "SQSHLU", "0Q1011110IIIIiii011001nnnnnddddd") INST(UQSHL_imm_2, "UQSHL (immediate)", "0Q1011110IIIIiii011101nnnnnddddd") INST(SQSHRUN_2, "SQSHRUN, SQSHRUN2", "0Q1011110IIIIiii100001nnnnnddddd") INST(SQRSHRUN_2, "SQRSHRUN, SQRSHRUN2", "0Q1011110IIIIiii100011nnnnnddddd") diff --git a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp index 8553bf80..5797d5df 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_shift_by_immediate.cpp @@ -15,6 +15,12 @@ enum class Narrowing { SaturateToSigned, }; +enum class SaturatingShiftLeftType { + Signed, + Unsigned, + SignedWithUnsignedSaturation, +}; + enum class ShiftExtraBehavior { None, Accumulate, @@ -30,7 +36,7 @@ enum class FloatConversionDirection { FloatToFixed, }; -bool SaturatingShiftLeft(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign) { +bool SaturatingShiftLeft(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, SaturatingShiftLeftType type) { if (immh == 0b0000) { return v.ReservedValue(); } @@ -40,11 +46,16 @@ bool SaturatingShiftLeft(TranslatorVisitor& v, Imm<4> immh, Imm<3> immb, Vec Vn, const IR::U128 operand = v.ir.ZeroExtendToQuad(v.V_scalar(esize, Vn)); const IR::U128 shift = v.ir.ZeroExtendToQuad(v.I(esize, shift_amount)); - const IR::U128 result = [&v, esize, operand, shift, sign] { - if (sign == Signedness::Signed) { + const IR::U128 result = [&v, esize, operand, shift, type] { + if (type == SaturatingShiftLeftType::Signed) { return v.ir.VectorSignedSaturatedShiftLeft(esize, operand, shift); } - return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift); + + if (type == SaturatingShiftLeftType::Unsigned) { + return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift); + } + + return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift); }(); v.ir.SetQ(Vd, result); @@ -275,7 +286,11 @@ bool TranslatorVisitor::SRI_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { } bool TranslatorVisitor::SQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { - return SaturatingShiftLeft(*this, immh, immb, Vn, Vd, Signedness::Signed); + return SaturatingShiftLeft(*this, immh, immb, Vn, Vd, SaturatingShiftLeftType::Signed); +} + +bool TranslatorVisitor::SQSHLU_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { + return SaturatingShiftLeft(*this, immh, immb, Vn, Vd, SaturatingShiftLeftType::SignedWithUnsignedSaturation); } bool TranslatorVisitor::SQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { @@ -318,7 +333,7 @@ bool TranslatorVisitor::SHL_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { } bool TranslatorVisitor::UQSHL_imm_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { - return SaturatingShiftLeft(*this, immh, immb, Vn, Vd, Signedness::Unsigned); + return SaturatingShiftLeft(*this, immh, immb, Vn, Vd, SaturatingShiftLeftType::Unsigned); } bool TranslatorVisitor::UQSHRN_1(Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { diff --git a/src/frontend/A64/translate/impl/simd_shift_by_immediate.cpp b/src/frontend/A64/translate/impl/simd_shift_by_immediate.cpp index 197740b0..3097da5e 100644 --- a/src/frontend/A64/translate/impl/simd_shift_by_immediate.cpp +++ b/src/frontend/A64/translate/impl/simd_shift_by_immediate.cpp @@ -31,6 +31,12 @@ enum class Narrowing { SaturateToSigned, }; +enum class SaturatingShiftLeftType { + Signed, + Unsigned, + SignedWithUnsignedSaturation, +}; + enum class FloatConversionDirection { FixedToFloat, FloatToFixed, @@ -160,7 +166,7 @@ bool ShiftLeftLong(TranslatorVisitor& v, bool Q, Imm<4> immh, Imm<3> immb, Vec V return true; } -bool SaturatingShiftLeft(TranslatorVisitor& v, bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, Signedness sign) { +bool SaturatingShiftLeft(TranslatorVisitor& v, bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd, SaturatingShiftLeftType type) { if (!Q && immh.Bit<3>()) { return v.ReservedValue(); } @@ -172,11 +178,15 @@ bool SaturatingShiftLeft(TranslatorVisitor& v, bool Q, Imm<4> immh, Imm<3> immb, const IR::U128 operand = v.V(datasize, Vn); const IR::U128 shift_vec = v.ir.VectorBroadcast(esize, v.I(esize, shift)); const IR::U128 result = [&] { - if (sign == Signedness::Signed) { + if (type == SaturatingShiftLeftType::Signed) { return v.ir.VectorSignedSaturatedShiftLeft(esize, operand, shift_vec); } - return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift_vec); + if (type == SaturatingShiftLeftType::Unsigned) { + return v.ir.VectorUnsignedSaturatedShiftLeft(esize, operand, shift_vec); + } + + return v.ir.VectorSignedSaturatedShiftLeftUnsigned(esize, operand, shift_vec); }(); v.V(datasize, Vd, result); @@ -267,7 +277,11 @@ bool TranslatorVisitor::RSHRN(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) } bool TranslatorVisitor::SQSHL_imm_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { - return SaturatingShiftLeft(*this, Q, immh, immb, Vn, Vd, Signedness::Signed); + return SaturatingShiftLeft(*this, Q, immh, immb, Vn, Vd, SaturatingShiftLeftType::Signed); +} + +bool TranslatorVisitor::SQSHLU_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { + return SaturatingShiftLeft(*this, Q, immh, immb, Vn, Vd, SaturatingShiftLeftType::SignedWithUnsignedSaturation); } bool TranslatorVisitor::SQSHRN_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { @@ -287,7 +301,7 @@ bool TranslatorVisitor::SQRSHRUN_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec } bool TranslatorVisitor::UQSHL_imm_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { - return SaturatingShiftLeft(*this, Q, immh, immb, Vn, Vd, Signedness::Unsigned); + return SaturatingShiftLeft(*this, Q, immh, immb, Vn, Vd, SaturatingShiftLeftType::Unsigned); } bool TranslatorVisitor::UQSHRN_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) { diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 53b546c0..e774f25d 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1661,6 +1661,21 @@ U128 IREmitter::VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, cons return {}; } +U128 IREmitter::VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned8, a, b); + case 16: + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned16, a, b); + case 32: + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned32, a, b); + case 64: + return Inst(Opcode::VectorSignedSaturatedShiftLeftUnsigned64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 291712ee..a77089fe 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -279,6 +279,7 @@ public: U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNeg(size_t esize, const U128& a); U128 VectorSignedSaturatedShiftLeft(size_t esize, const U128& a, const U128& b); + U128 VectorSignedSaturatedShiftLeftUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorSub(size_t esize, const U128& a, const U128& b); Table VectorTable(std::vector values); U128 VectorTableLookup(const U128& defaults, const Table& table, const U128& indices); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 46d5fb87..b34b989d 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -384,6 +384,10 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorSignedSaturatedShiftLeft16: case Opcode::VectorSignedSaturatedShiftLeft32: case Opcode::VectorSignedSaturatedShiftLeft64: + case Opcode::VectorSignedSaturatedShiftLeftUnsigned8: + case Opcode::VectorSignedSaturatedShiftLeftUnsigned16: + case Opcode::VectorSignedSaturatedShiftLeftUnsigned32: + case Opcode::VectorSignedSaturatedShiftLeftUnsigned64: case Opcode::VectorUnsignedSaturatedAccumulateSigned8: case Opcode::VectorUnsignedSaturatedAccumulateSigned16: case Opcode::VectorUnsignedSaturatedAccumulateSigned32: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index b5e8f691..7723b03e 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -428,6 +428,10 @@ OPCODE(VectorSignedSaturatedShiftLeft8, U128, U128 OPCODE(VectorSignedSaturatedShiftLeft16, U128, U128, U128 ) OPCODE(VectorSignedSaturatedShiftLeft32, U128, U128, U128 ) OPCODE(VectorSignedSaturatedShiftLeft64, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned8, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned16, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned32, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedShiftLeftUnsigned64, U128, U128, U128 ) OPCODE(VectorSub8, U128, U128, U128 ) OPCODE(VectorSub16, U128, U128, U128 ) OPCODE(VectorSub32, U128, U128, U128 )