diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index c9024d8a..a519cd09 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -510,6 +510,134 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) ctx.reg_alloc.DefineValue(inst, result); } +template +static constexpr T VShift(T x, T y) { + const s8 shift_amount = static_cast(static_cast(y)); + const s64 bit_size = static_cast(Common::BitSize()); + + if constexpr (std::is_signed_v) { + if (shift_amount >= bit_size) { + return 0; + } + + if (shift_amount <= -bit_size) { + // Parentheses necessary, as MSVC doesn't appear to consider cast parentheses + // as a grouping in terms of precedence, causing warning C4554 to fire. See: + // https://developercommunity.visualstudio.com/content/problem/144783/msvc-2017-does-not-understand-that-static-cast-cou.html + return x >> (T(bit_size - 1)); + } + } else if (shift_amount <= -bit_size || shift_amount >= bit_size) { + return 0; + } + + if (shift_amount < 0) { + return x >> T(-shift_amount); + } + + using unsigned_type = std::make_unsigned_t; + return static_cast(static_cast(x) << static_cast(shift_amount)); +} + +void EmitX64::EmitVectorArithmeticVShift8(EmitContext& ctx, IR::Inst* inst) { + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); + }); +} + +void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512BW)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.vmovdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); + code.vpxor(right_shift, right_shift, right_shift); + code.vpsubw(right_shift, right_shift, left_shift); + + code.vpsllw(xmm0, left_shift, 8); + code.vpsraw(xmm0, xmm0, 15); + + code.vpand(right_shift, right_shift, tmp); + code.vpand(left_shift, left_shift, tmp); + + code.vpsravw(tmp, result, right_shift); + code.vpsllvw(result, result, left_shift); + code.pblendvb(result, tmp); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); + }); +} + +void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); + code.vpxor(right_shift, right_shift, right_shift); + code.vpsubd(right_shift, right_shift, left_shift); + + code.vpslld(xmm0, left_shift, 24); + + code.vpand(right_shift, right_shift, tmp); + code.vpand(left_shift, left_shift, tmp); + + code.vpsravd(tmp, result, right_shift); + code.vpsllvd(result, result, left_shift); + code.blendvps(result, tmp); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); + }); +} + +void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { + if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.vmovdqa(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); + code.vpxor(right_shift, right_shift, right_shift); + code.vpsubq(right_shift, right_shift, left_shift); + + code.vpsllq(xmm0, left_shift, 56); + + code.vpand(right_shift, right_shift, tmp); + code.vpand(left_shift, left_shift, tmp); + + code.vpsravq(tmp, result, right_shift); + code.vpsllvq(result, result, left_shift); + code.blendvpd(result, tmp); + + ctx.reg_alloc.DefineValue(inst, result); + return; + } + + EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); + }); +} + void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1336,141 +1464,13 @@ void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } -template -static constexpr T LogicalVShift(T x, T y) { - const s8 shift_amount = static_cast(static_cast(y)); - const s64 bit_size = static_cast(Common::BitSize()); - - if constexpr (std::is_signed_v) { - if (shift_amount >= bit_size) { - return 0; - } - - if (shift_amount <= -bit_size) { - // Parentheses necessary, as MSVC doesn't appear to consider cast parentheses - // as a grouping in terms of precedence, causing warning C4554 to fire. See: - // https://developercommunity.visualstudio.com/content/problem/144783/msvc-2017-does-not-understand-that-static-cast-cou.html - return x >> (T(bit_size - 1)); - } - } else if (shift_amount <= -bit_size || shift_amount >= bit_size) { - return 0; - } - - if (shift_amount < 0) { - return x >> T(-shift_amount); - } - - using unsigned_type = std::make_unsigned_t; - return static_cast(static_cast(x) << static_cast(shift_amount)); -} - -void EmitX64::EmitVectorLogicalVShiftS8(EmitContext& ctx, IR::Inst* inst) { - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); - }); -} - -void EmitX64::EmitVectorLogicalVShiftS16(EmitContext& ctx, IR::Inst* inst) { - if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512BW)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - - code.vmovdqa(tmp, code.MConst(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); - code.vpxor(right_shift, right_shift, right_shift); - code.vpsubw(right_shift, right_shift, left_shift); - - code.vpsllw(xmm0, left_shift, 8); - code.vpsraw(xmm0, xmm0, 15); - - code.vpand(right_shift, right_shift, tmp); - code.vpand(left_shift, left_shift, tmp); - - code.vpsravw(tmp, result, right_shift); - code.vpsllvw(result, result, left_shift); - code.pblendvb(result, tmp); - - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); - }); -} - -void EmitX64::EmitVectorLogicalVShiftS32(EmitContext& ctx, IR::Inst* inst) { - if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - - code.vmovdqa(tmp, code.MConst(xword, 0x000000FF000000FF, 0x000000FF000000FF)); - code.vpxor(right_shift, right_shift, right_shift); - code.vpsubd(right_shift, right_shift, left_shift); - - code.vpslld(xmm0, left_shift, 24); - - code.vpand(right_shift, right_shift, tmp); - code.vpand(left_shift, left_shift, tmp); - - code.vpsravd(tmp, result, right_shift); - code.vpsllvd(result, result, left_shift); - code.blendvps(result, tmp); - - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); - }); -} - -void EmitX64::EmitVectorLogicalVShiftS64(EmitContext& ctx, IR::Inst* inst) { - if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512F) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL)) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - - code.vmovdqa(tmp, code.MConst(xword, 0x00000000000000FF, 0x00000000000000FF)); - code.vpxor(right_shift, right_shift, right_shift); - code.vpsubq(right_shift, right_shift, left_shift); - - code.vpsllq(xmm0, left_shift, 56); - - code.vpand(right_shift, right_shift, tmp); - code.vpand(left_shift, left_shift, tmp); - - code.vpsravq(tmp, result, right_shift); - code.vpsllvq(result, result, left_shift); - code.blendvpd(result, tmp); - - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); - }); -} - -void EmitX64::EmitVectorLogicalVShiftU8(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); } -void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512VL) && code.DoesCpuSupport(Xbyak::util::Cpu::tAVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1494,11 +1494,11 @@ void EmitX64::EmitVectorLogicalVShiftU16(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); } -void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1522,11 +1522,11 @@ void EmitX64::EmitVectorLogicalVShiftU32(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); } -void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) { +void EmitX64::EmitVectorLogicalVShift64(EmitContext& ctx, IR::Inst* inst) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tAVX2)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1550,7 +1550,7 @@ void EmitX64::EmitVectorLogicalVShiftU64(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), LogicalVShift); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); } diff --git a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp index c512e2e5..16c28539 100644 --- a/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_scalar_three_same.cpp @@ -330,7 +330,7 @@ bool TranslatorVisitor::SSHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(64, Vn); const IR::U128 operand2 = V(64, Vm); - const IR::U128 result = ir.VectorLogicalVShiftSigned(64, operand1, operand2); + const IR::U128 result = ir.VectorArithmeticVShift(64, operand1, operand2); V(64, Vd, result); return true; @@ -361,7 +361,7 @@ bool TranslatorVisitor::USHL_1(Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(64, Vn); const IR::U128 operand2 = V(64, Vm); - const IR::U128 result = ir.VectorLogicalVShiftUnsigned(64, operand1, operand2); + const IR::U128 result = ir.VectorLogicalVShift(64, operand1, operand2); V(64, Vd, result); return true; diff --git a/src/frontend/A64/translate/impl/simd_three_same.cpp b/src/frontend/A64/translate/impl/simd_three_same.cpp index 486b292c..04faeb3c 100644 --- a/src/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/frontend/A64/translate/impl/simd_three_same.cpp @@ -817,7 +817,7 @@ bool TranslatorVisitor::SSHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::U128 result = ir.VectorLogicalVShiftSigned(esize, operand1, operand2); + const IR::U128 result = ir.VectorArithmeticVShift(esize, operand1, operand2); V(datasize, Vd, result); return true; } @@ -839,7 +839,7 @@ bool TranslatorVisitor::USHL_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - const IR::U128 result = ir.VectorLogicalVShiftUnsigned(esize, operand1, operand2); + const IR::U128 result = ir.VectorLogicalVShift(esize, operand1, operand2); V(datasize, Vd, result); return true; } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 9ae1fc21..ab708013 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -888,6 +888,21 @@ U128 IREmitter::VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift return {}; } +U128 IREmitter::VectorArithmeticVShift(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 8: + return Inst(Opcode::VectorArithmeticVShift8, a, b); + case 16: + return Inst(Opcode::VectorArithmeticVShift16, a, b); + case 32: + return Inst(Opcode::VectorArithmeticVShift32, a, b); + case 64: + return Inst(Opcode::VectorArithmeticVShift64, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorBroadcastLower(size_t esize, const UAny& a) { switch (esize) { case 8: @@ -1145,31 +1160,16 @@ U128 IREmitter::VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_am return {}; } -U128 IREmitter::VectorLogicalVShiftSigned(size_t esize, const U128& a, const U128& b) { +U128 IREmitter::VectorLogicalVShift(size_t esize, const U128& a, const U128& b) { switch (esize) { case 8: - return Inst(Opcode::VectorLogicalVShiftS8, a, b); + return Inst(Opcode::VectorLogicalVShift8, a, b); case 16: - return Inst(Opcode::VectorLogicalVShiftS16, a, b); + return Inst(Opcode::VectorLogicalVShift16, a, b); case 32: - return Inst(Opcode::VectorLogicalVShiftS32, a, b); + return Inst(Opcode::VectorLogicalVShift32, a, b); case 64: - return Inst(Opcode::VectorLogicalVShiftS64, a, b); - } - UNREACHABLE(); - return {}; -} - -U128 IREmitter::VectorLogicalVShiftUnsigned(size_t esize, const U128& a, const U128& b) { - switch (esize) { - case 8: - return Inst(Opcode::VectorLogicalVShiftU8, a, b); - case 16: - return Inst(Opcode::VectorLogicalVShiftU16, a, b); - case 32: - return Inst(Opcode::VectorLogicalVShiftU32, a, b); - case 64: - return Inst(Opcode::VectorLogicalVShiftU64, a, b); + return Inst(Opcode::VectorLogicalVShift64, a, b); } UNREACHABLE(); return {}; diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 22f252de..0f49600f 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -212,6 +212,7 @@ public: U128 VectorAdd(size_t esize, const U128& a, const U128& b); U128 VectorAnd(const U128& a, const U128& b); U128 VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount); + U128 VectorArithmeticVShift(size_t esize, const U128& a, const U128& b); U128 VectorBroadcast(size_t esize, const UAny& a); U128 VectorBroadcastLower(size_t esize, const UAny& a); U128 VectorCountLeadingZeros(size_t esize, const U128& a); @@ -237,8 +238,7 @@ public: U128 VectorLessUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorLogicalShiftLeft(size_t esize, const U128& a, u8 shift_amount); U128 VectorLogicalShiftRight(size_t esize, const U128& a, u8 shift_amount); - U128 VectorLogicalVShiftSigned(size_t esize, const U128& a, const U128& b); - U128 VectorLogicalVShiftUnsigned(size_t esize, const U128& a, const U128& b); + U128 VectorLogicalVShift(size_t esize, const U128& a, const U128& b); U128 VectorMaxSigned(size_t esize, const U128& a, const U128& b); U128 VectorMaxUnsigned(size_t esize, const U128& a, const U128& b); U128 VectorMinSigned(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 6c5774fe..4bcf3930 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -253,6 +253,10 @@ OPCODE(VectorArithmeticShiftRight8, U128, U128 OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 ) OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 ) OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 ) +OPCODE(VectorArithmeticVShift8, U128, U128, U128 ) +OPCODE(VectorArithmeticVShift16, U128, U128, U128 ) +OPCODE(VectorArithmeticVShift32, U128, U128, U128 ) +OPCODE(VectorArithmeticVShift64, U128, U128, U128 ) OPCODE(VectorBroadcastLower8, U128, U8 ) OPCODE(VectorBroadcastLower16, U128, U16 ) OPCODE(VectorBroadcastLower32, U128, U32 ) @@ -311,14 +315,10 @@ OPCODE(VectorLogicalShiftRight8, U128, U128 OPCODE(VectorLogicalShiftRight16, U128, U128, U8 ) OPCODE(VectorLogicalShiftRight32, U128, U128, U8 ) OPCODE(VectorLogicalShiftRight64, U128, U128, U8 ) -OPCODE(VectorLogicalVShiftS8, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS16, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS32, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS64, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU8, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU16, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU32, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU64, U128, U128, U128 ) +OPCODE(VectorLogicalVShift8, U128, U128, U128 ) +OPCODE(VectorLogicalVShift16, U128, U128, U128 ) +OPCODE(VectorLogicalVShift32, U128, U128, U128 ) +OPCODE(VectorLogicalVShift64, U128, U128, U128 ) OPCODE(VectorMaxS8, U128, U128, U128 ) OPCODE(VectorMaxS16, U128, U128, U128 ) OPCODE(VectorMaxS32, U128, U128, U128 )