diff --git a/src/backend/x64/emit_x64_vector.cpp b/src/backend/x64/emit_x64_vector.cpp index 3b98af67..4b960b29 100644 --- a/src/backend/x64/emit_x64_vector.cpp +++ b/src/backend/x64/emit_x64_vector.cpp @@ -2757,6 +2757,80 @@ void EmitX64::EmitVectorSignedSaturatedAbs64(EmitContext& ctx, IR::Inst* inst) { }); } +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(tmp, x); + code.pmulhw(tmp, y); + code.paddw(tmp, tmp); + code.pmullw(y, x); + code.psrlw(y, 15); + code.por(y, tmp); + + code.movdqa(x, code.MConst(xword, 0x8000800080008000, 0x8000800080008000)); + code.pcmpeqw(x, y); + code.movdqa(tmp, x); + code.pxor(x, y); + + // Check if any saturation occurred (i.e. if any halfwords in x were + // 0x8000 before saturating + const Xbyak::Reg64 mask = ctx.reg_alloc.ScratchGpr(); + code.pmovmskb(mask, tmp); + code.test(mask.cvt32(), 0b1010'1010'1010'1010); + code.setnz(mask.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask.cvt8()); + + ctx.reg_alloc.DefineValue(inst, x); +} + +void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + + code.movdqa(tmp1, x); + code.punpckldq(tmp1, y); + + code.movdqa(tmp2, y); + code.punpckldq(tmp2, x); + + code.pmuldq(tmp2, tmp1); + code.paddq(tmp2, tmp2); + + code.movdqa(tmp1, x); + code.punpckhdq(tmp1, y); + code.punpckhdq(y, x); + + code.pmuldq(y, tmp1); + code.paddq(y, y); + + code.pshufd(tmp1, tmp2, 0b11101101); + code.pshufd(x, y, 0b11101101); + code.punpcklqdq(tmp1, x); + + code.movdqa(x, code.MConst(xword, 0x8000000080000000, 0x8000000080000000)); + code.pcmpeqd(x, tmp1); + code.movdqa(tmp2, x); + code.pxor(x, tmp1); + + // Check if any saturation occurred (i.e. if any words in x were + // 0x80000000 before saturating + const Xbyak::Reg64 mask = ctx.reg_alloc.ScratchGpr(); + code.pmovmskb(mask, tmp2); + code.test(mask.cvt32(), 0b1000'1000'1000'1000); + code.setnz(mask.cvt8()); + code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask.cvt8()); + + ctx.reg_alloc.DefineValue(inst, x); +} + static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]); diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index a1680af0..62bf0685 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -1511,6 +1511,17 @@ U128 IREmitter::VectorSignedSaturatedAbs(size_t esize, const U128& a) { return {}; } +U128 IREmitter::VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b) { + switch (esize) { + case 16: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16, a, b); + case 32: + return Inst(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32, a, b); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) { switch (original_esize) { case 16: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index 46beaf9f..d500f749 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -264,6 +264,7 @@ public: U128 VectorSignExtend(size_t original_esize, const U128& a); U128 VectorSignedAbsoluteDifference(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedAbs(size_t esize, const U128& a); + U128 VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b); U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a); U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a); U128 VectorSub(size_t esize, const U128& a, const U128& b); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index c0da1c25..fe1cac6c 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -357,6 +357,8 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const { case Opcode::VectorSignedSaturatedNarrowToUnsigned16: case Opcode::VectorSignedSaturatedNarrowToUnsigned32: case Opcode::VectorSignedSaturatedNarrowToUnsigned64: + case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16: + case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32: case Opcode::VectorUnsignedSaturatedNarrow16: case Opcode::VectorUnsignedSaturatedNarrow32: case Opcode::VectorUnsignedSaturatedNarrow64: diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7b93cbcd..8f01b7d5 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -1,575 +1,577 @@ -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Void, Void, ) -OPCODE(Identity, Opaque, Opaque ) -OPCODE(Breakpoint, Void, ) +OPCODE(Void, Void, ) +OPCODE(Identity, Opaque, Opaque ) +OPCODE(Breakpoint, Void, ) // A32 Context getters/setters -A32OPC(GetRegister, U32, A32Reg ) -A32OPC(GetExtendedRegister32, U32, A32ExtReg ) -A32OPC(GetExtendedRegister64, U64, A32ExtReg ) -A32OPC(SetRegister, Void, A32Reg, U32 ) -A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 ) -A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 ) -A32OPC(GetCpsr, U32, ) -A32OPC(SetCpsr, Void, U32 ) -A32OPC(SetCpsrNZCV, Void, U32 ) -A32OPC(SetCpsrNZCVQ, Void, U32 ) -A32OPC(GetNFlag, U1, ) -A32OPC(SetNFlag, Void, U1 ) -A32OPC(GetZFlag, U1, ) -A32OPC(SetZFlag, Void, U1 ) -A32OPC(GetCFlag, U1, ) -A32OPC(SetCFlag, Void, U1 ) -A32OPC(GetVFlag, U1, ) -A32OPC(SetVFlag, Void, U1 ) -A32OPC(OrQFlag, Void, U1 ) -A32OPC(GetGEFlags, U32, ) -A32OPC(SetGEFlags, Void, U32 ) -A32OPC(SetGEFlagsCompressed, Void, U32 ) -A32OPC(BXWritePC, Void, U32 ) -A32OPC(CallSupervisor, Void, U32 ) -A32OPC(ExceptionRaised, Void, U32, U64 ) -A32OPC(GetFpscr, U32, ) -A32OPC(SetFpscr, Void, U32, ) -A32OPC(GetFpscrNZCV, U32, ) -A32OPC(SetFpscrNZCV, Void, NZCV ) +A32OPC(GetRegister, U32, A32Reg ) +A32OPC(GetExtendedRegister32, U32, A32ExtReg ) +A32OPC(GetExtendedRegister64, U64, A32ExtReg ) +A32OPC(SetRegister, Void, A32Reg, U32 ) +A32OPC(SetExtendedRegister32, Void, A32ExtReg, U32 ) +A32OPC(SetExtendedRegister64, Void, A32ExtReg, U64 ) +A32OPC(GetCpsr, U32, ) +A32OPC(SetCpsr, Void, U32 ) +A32OPC(SetCpsrNZCV, Void, U32 ) +A32OPC(SetCpsrNZCVQ, Void, U32 ) +A32OPC(GetNFlag, U1, ) +A32OPC(SetNFlag, Void, U1 ) +A32OPC(GetZFlag, U1, ) +A32OPC(SetZFlag, Void, U1 ) +A32OPC(GetCFlag, U1, ) +A32OPC(SetCFlag, Void, U1 ) +A32OPC(GetVFlag, U1, ) +A32OPC(SetVFlag, Void, U1 ) +A32OPC(OrQFlag, Void, U1 ) +A32OPC(GetGEFlags, U32, ) +A32OPC(SetGEFlags, Void, U32 ) +A32OPC(SetGEFlagsCompressed, Void, U32 ) +A32OPC(BXWritePC, Void, U32 ) +A32OPC(CallSupervisor, Void, U32 ) +A32OPC(ExceptionRaised, Void, U32, U64 ) +A32OPC(GetFpscr, U32, ) +A32OPC(SetFpscr, Void, U32, ) +A32OPC(GetFpscrNZCV, U32, ) +A32OPC(SetFpscrNZCV, Void, NZCV ) // A64 Context getters/setters -A64OPC(SetCheckBit, Void, U1 ) -A64OPC(GetCFlag, U1, ) -A64OPC(SetNZCV, Void, NZCV ) -A64OPC(GetW, U32, A64Reg ) -A64OPC(GetX, U64, A64Reg ) -A64OPC(GetS, U128, A64Vec ) -A64OPC(GetD, U128, A64Vec ) -A64OPC(GetQ, U128, A64Vec ) -A64OPC(GetSP, U64, ) -A64OPC(GetFPCR, U32, ) -A64OPC(GetFPSR, U32, ) -A64OPC(SetW, Void, A64Reg, U32 ) -A64OPC(SetX, Void, A64Reg, U64 ) -A64OPC(SetS, Void, A64Vec, U128 ) -A64OPC(SetD, Void, A64Vec, U128 ) -A64OPC(SetQ, Void, A64Vec, U128 ) -A64OPC(SetSP, Void, U64 ) -A64OPC(SetFPCR, Void, U32 ) -A64OPC(SetFPSR, Void, U32 ) -A64OPC(OrQC, Void, U1 ) -A64OPC(SetPC, Void, U64 ) -A64OPC(CallSupervisor, Void, U32 ) -A64OPC(ExceptionRaised, Void, U64, U64 ) -A64OPC(DataCacheOperationRaised, Void, U64, U64 ) -A64OPC(DataSynchronizationBarrier, Void, ) -A64OPC(DataMemoryBarrier, Void, ) -A64OPC(InstructionSynchronizationBarrier, Void, ) -A64OPC(GetCNTFRQ, U32, ) -A64OPC(GetCNTPCT, U64, ) -A64OPC(GetCTR, U32, ) -A64OPC(GetDCZID, U32, ) -A64OPC(GetTPIDR, U64, ) -A64OPC(GetTPIDRRO, U64, ) -A64OPC(SetTPIDR, Void, U64 ) +A64OPC(SetCheckBit, Void, U1 ) +A64OPC(GetCFlag, U1, ) +A64OPC(SetNZCV, Void, NZCV ) +A64OPC(GetW, U32, A64Reg ) +A64OPC(GetX, U64, A64Reg ) +A64OPC(GetS, U128, A64Vec ) +A64OPC(GetD, U128, A64Vec ) +A64OPC(GetQ, U128, A64Vec ) +A64OPC(GetSP, U64, ) +A64OPC(GetFPCR, U32, ) +A64OPC(GetFPSR, U32, ) +A64OPC(SetW, Void, A64Reg, U32 ) +A64OPC(SetX, Void, A64Reg, U64 ) +A64OPC(SetS, Void, A64Vec, U128 ) +A64OPC(SetD, Void, A64Vec, U128 ) +A64OPC(SetQ, Void, A64Vec, U128 ) +A64OPC(SetSP, Void, U64 ) +A64OPC(SetFPCR, Void, U32 ) +A64OPC(SetFPSR, Void, U32 ) +A64OPC(OrQC, Void, U1 ) +A64OPC(SetPC, Void, U64 ) +A64OPC(CallSupervisor, Void, U32 ) +A64OPC(ExceptionRaised, Void, U64, U64 ) +A64OPC(DataCacheOperationRaised, Void, U64, U64 ) +A64OPC(DataSynchronizationBarrier, Void, ) +A64OPC(DataMemoryBarrier, Void, ) +A64OPC(InstructionSynchronizationBarrier, Void, ) +A64OPC(GetCNTFRQ, U32, ) +A64OPC(GetCNTPCT, U64, ) +A64OPC(GetCTR, U32, ) +A64OPC(GetDCZID, U32, ) +A64OPC(GetTPIDR, U64, ) +A64OPC(GetTPIDRRO, U64, ) +A64OPC(SetTPIDR, Void, U64 ) // Hints -OPCODE(PushRSB, Void, U64 ) +OPCODE(PushRSB, Void, U64 ) // Pseudo-operation, handled specially at final emit -OPCODE(GetCarryFromOp, U1, Opaque ) -OPCODE(GetOverflowFromOp, U1, Opaque ) -OPCODE(GetGEFromOp, U32, Opaque ) -OPCODE(GetNZCVFromOp, NZCV, Opaque ) +OPCODE(GetCarryFromOp, U1, Opaque ) +OPCODE(GetOverflowFromOp, U1, Opaque ) +OPCODE(GetGEFromOp, U32, Opaque ) +OPCODE(GetNZCVFromOp, NZCV, Opaque ) -OPCODE(NZCVFromPackedFlags, NZCV, U32 ) +OPCODE(NZCVFromPackedFlags, NZCV, U32 ) // Calculations -OPCODE(Pack2x32To1x64, U64, U32, U32 ) -OPCODE(Pack2x64To1x128, U128, U64, U64 ) -OPCODE(LeastSignificantWord, U32, U64 ) -OPCODE(MostSignificantWord, U32, U64 ) -OPCODE(LeastSignificantHalf, U16, U32 ) -OPCODE(LeastSignificantByte, U8, U32 ) -OPCODE(MostSignificantBit, U1, U32 ) -OPCODE(IsZero32, U1, U32 ) -OPCODE(IsZero64, U1, U64 ) -OPCODE(TestBit, U1, U64, U8 ) -OPCODE(ConditionalSelect32, U32, Cond, U32, U32 ) -OPCODE(ConditionalSelect64, U64, Cond, U64, U64 ) -OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV ) -OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 ) -OPCODE(LogicalShiftLeft64, U64, U64, U8 ) -OPCODE(LogicalShiftRight32, U32, U32, U8, U1 ) -OPCODE(LogicalShiftRight64, U64, U64, U8 ) -OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) -OPCODE(ArithmeticShiftRight64, U64, U64, U8 ) -OPCODE(RotateRight32, U32, U32, U8, U1 ) -OPCODE(RotateRight64, U64, U64, U8 ) -OPCODE(RotateRightExtended, U32, U32, U1 ) -OPCODE(Add32, U32, U32, U32, U1 ) -OPCODE(Add64, U64, U64, U64, U1 ) -OPCODE(Sub32, U32, U32, U32, U1 ) -OPCODE(Sub64, U64, U64, U64, U1 ) -OPCODE(Mul32, U32, U32, U32 ) -OPCODE(Mul64, U64, U64, U64 ) -OPCODE(SignedMultiplyHigh64, U64, U64, U64 ) -OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 ) -OPCODE(UnsignedDiv32, U32, U32, U32 ) -OPCODE(UnsignedDiv64, U64, U64, U64 ) -OPCODE(SignedDiv32, U32, U32, U32 ) -OPCODE(SignedDiv64, U64, U64, U64 ) -OPCODE(And32, U32, U32, U32 ) -OPCODE(And64, U64, U64, U64 ) -OPCODE(Eor32, U32, U32, U32 ) -OPCODE(Eor64, U64, U64, U64 ) -OPCODE(Or32, U32, U32, U32 ) -OPCODE(Or64, U64, U64, U64 ) -OPCODE(Not32, U32, U32 ) -OPCODE(Not64, U64, U64 ) -OPCODE(SignExtendByteToWord, U32, U8 ) -OPCODE(SignExtendHalfToWord, U32, U16 ) -OPCODE(SignExtendByteToLong, U64, U8 ) -OPCODE(SignExtendHalfToLong, U64, U16 ) -OPCODE(SignExtendWordToLong, U64, U32 ) -OPCODE(ZeroExtendByteToWord, U32, U8 ) -OPCODE(ZeroExtendHalfToWord, U32, U16 ) -OPCODE(ZeroExtendByteToLong, U64, U8 ) -OPCODE(ZeroExtendHalfToLong, U64, U16 ) -OPCODE(ZeroExtendWordToLong, U64, U32 ) -OPCODE(ZeroExtendLongToQuad, U128, U64 ) -OPCODE(ByteReverseWord, U32, U32 ) -OPCODE(ByteReverseHalf, U16, U16 ) -OPCODE(ByteReverseDual, U64, U64 ) -OPCODE(CountLeadingZeros32, U32, U32 ) -OPCODE(CountLeadingZeros64, U64, U64 ) -OPCODE(ExtractRegister32, U32, U32, U32, U8 ) -OPCODE(ExtractRegister64, U64, U64, U64, U8 ) -OPCODE(MaxSigned32, U32, U32, U32 ) -OPCODE(MaxSigned64, U64, U64, U64 ) -OPCODE(MaxUnsigned32, U32, U32, U32 ) -OPCODE(MaxUnsigned64, U64, U64, U64 ) -OPCODE(MinSigned32, U32, U32, U32 ) -OPCODE(MinSigned64, U64, U64, U64 ) -OPCODE(MinUnsigned32, U32, U32, U32 ) -OPCODE(MinUnsigned64, U64, U64, U64 ) +OPCODE(Pack2x32To1x64, U64, U32, U32 ) +OPCODE(Pack2x64To1x128, U128, U64, U64 ) +OPCODE(LeastSignificantWord, U32, U64 ) +OPCODE(MostSignificantWord, U32, U64 ) +OPCODE(LeastSignificantHalf, U16, U32 ) +OPCODE(LeastSignificantByte, U8, U32 ) +OPCODE(MostSignificantBit, U1, U32 ) +OPCODE(IsZero32, U1, U32 ) +OPCODE(IsZero64, U1, U64 ) +OPCODE(TestBit, U1, U64, U8 ) +OPCODE(ConditionalSelect32, U32, Cond, U32, U32 ) +OPCODE(ConditionalSelect64, U64, Cond, U64, U64 ) +OPCODE(ConditionalSelectNZCV, NZCV, Cond, NZCV, NZCV ) +OPCODE(LogicalShiftLeft32, U32, U32, U8, U1 ) +OPCODE(LogicalShiftLeft64, U64, U64, U8 ) +OPCODE(LogicalShiftRight32, U32, U32, U8, U1 ) +OPCODE(LogicalShiftRight64, U64, U64, U8 ) +OPCODE(ArithmeticShiftRight32, U32, U32, U8, U1 ) +OPCODE(ArithmeticShiftRight64, U64, U64, U8 ) +OPCODE(RotateRight32, U32, U32, U8, U1 ) +OPCODE(RotateRight64, U64, U64, U8 ) +OPCODE(RotateRightExtended, U32, U32, U1 ) +OPCODE(Add32, U32, U32, U32, U1 ) +OPCODE(Add64, U64, U64, U64, U1 ) +OPCODE(Sub32, U32, U32, U32, U1 ) +OPCODE(Sub64, U64, U64, U64, U1 ) +OPCODE(Mul32, U32, U32, U32 ) +OPCODE(Mul64, U64, U64, U64 ) +OPCODE(SignedMultiplyHigh64, U64, U64, U64 ) +OPCODE(UnsignedMultiplyHigh64, U64, U64, U64 ) +OPCODE(UnsignedDiv32, U32, U32, U32 ) +OPCODE(UnsignedDiv64, U64, U64, U64 ) +OPCODE(SignedDiv32, U32, U32, U32 ) +OPCODE(SignedDiv64, U64, U64, U64 ) +OPCODE(And32, U32, U32, U32 ) +OPCODE(And64, U64, U64, U64 ) +OPCODE(Eor32, U32, U32, U32 ) +OPCODE(Eor64, U64, U64, U64 ) +OPCODE(Or32, U32, U32, U32 ) +OPCODE(Or64, U64, U64, U64 ) +OPCODE(Not32, U32, U32 ) +OPCODE(Not64, U64, U64 ) +OPCODE(SignExtendByteToWord, U32, U8 ) +OPCODE(SignExtendHalfToWord, U32, U16 ) +OPCODE(SignExtendByteToLong, U64, U8 ) +OPCODE(SignExtendHalfToLong, U64, U16 ) +OPCODE(SignExtendWordToLong, U64, U32 ) +OPCODE(ZeroExtendByteToWord, U32, U8 ) +OPCODE(ZeroExtendHalfToWord, U32, U16 ) +OPCODE(ZeroExtendByteToLong, U64, U8 ) +OPCODE(ZeroExtendHalfToLong, U64, U16 ) +OPCODE(ZeroExtendWordToLong, U64, U32 ) +OPCODE(ZeroExtendLongToQuad, U128, U64 ) +OPCODE(ByteReverseWord, U32, U32 ) +OPCODE(ByteReverseHalf, U16, U16 ) +OPCODE(ByteReverseDual, U64, U64 ) +OPCODE(CountLeadingZeros32, U32, U32 ) +OPCODE(CountLeadingZeros64, U64, U64 ) +OPCODE(ExtractRegister32, U32, U32, U32, U8 ) +OPCODE(ExtractRegister64, U64, U64, U64, U8 ) +OPCODE(MaxSigned32, U32, U32, U32 ) +OPCODE(MaxSigned64, U64, U64, U64 ) +OPCODE(MaxUnsigned32, U32, U32, U32 ) +OPCODE(MaxUnsigned64, U64, U64, U64 ) +OPCODE(MinSigned32, U32, U32, U32 ) +OPCODE(MinSigned64, U64, U64, U64 ) +OPCODE(MinUnsigned32, U32, U32, U32 ) +OPCODE(MinUnsigned64, U64, U64, U64 ) // Saturated instructions -OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) -OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) -OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) -OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) -OPCODE(SignedSaturatedSub8, U8, U8, U8 ) -OPCODE(SignedSaturatedSub16, U16, U16, U16 ) -OPCODE(SignedSaturatedSub32, U32, U32, U32 ) -OPCODE(SignedSaturatedSub64, U64, U64, U64 ) -OPCODE(SignedSaturation, U32, U32, U8 ) -OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 ) -OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 ) -OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 ) -OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 ) -OPCODE(UnsignedSaturatedSub8, U8, U8, U8 ) -OPCODE(UnsignedSaturatedSub16, U16, U16, U16 ) -OPCODE(UnsignedSaturatedSub32, U32, U32, U32 ) -OPCODE(UnsignedSaturatedSub64, U64, U64, U64 ) -OPCODE(UnsignedSaturation, U32, U32, U8 ) +OPCODE(SignedSaturatedAdd8, U8, U8, U8 ) +OPCODE(SignedSaturatedAdd16, U16, U16, U16 ) +OPCODE(SignedSaturatedAdd32, U32, U32, U32 ) +OPCODE(SignedSaturatedAdd64, U64, U64, U64 ) +OPCODE(SignedSaturatedSub8, U8, U8, U8 ) +OPCODE(SignedSaturatedSub16, U16, U16, U16 ) +OPCODE(SignedSaturatedSub32, U32, U32, U32 ) +OPCODE(SignedSaturatedSub64, U64, U64, U64 ) +OPCODE(SignedSaturation, U32, U32, U8 ) +OPCODE(UnsignedSaturatedAdd8, U8, U8, U8 ) +OPCODE(UnsignedSaturatedAdd16, U16, U16, U16 ) +OPCODE(UnsignedSaturatedAdd32, U32, U32, U32 ) +OPCODE(UnsignedSaturatedAdd64, U64, U64, U64 ) +OPCODE(UnsignedSaturatedSub8, U8, U8, U8 ) +OPCODE(UnsignedSaturatedSub16, U16, U16, U16 ) +OPCODE(UnsignedSaturatedSub32, U32, U32, U32 ) +OPCODE(UnsignedSaturatedSub64, U64, U64, U64 ) +OPCODE(UnsignedSaturation, U32, U32, U8 ) // Packed instructions -OPCODE(PackedAddU8, U32, U32, U32 ) -OPCODE(PackedAddS8, U32, U32, U32 ) -OPCODE(PackedSubU8, U32, U32, U32 ) -OPCODE(PackedSubS8, U32, U32, U32 ) -OPCODE(PackedAddU16, U32, U32, U32 ) -OPCODE(PackedAddS16, U32, U32, U32 ) -OPCODE(PackedSubU16, U32, U32, U32 ) -OPCODE(PackedSubS16, U32, U32, U32 ) -OPCODE(PackedAddSubU16, U32, U32, U32 ) -OPCODE(PackedAddSubS16, U32, U32, U32 ) -OPCODE(PackedSubAddU16, U32, U32, U32 ) -OPCODE(PackedSubAddS16, U32, U32, U32 ) -OPCODE(PackedHalvingAddU8, U32, U32, U32 ) -OPCODE(PackedHalvingAddS8, U32, U32, U32 ) -OPCODE(PackedHalvingSubU8, U32, U32, U32 ) -OPCODE(PackedHalvingSubS8, U32, U32, U32 ) -OPCODE(PackedHalvingAddU16, U32, U32, U32 ) -OPCODE(PackedHalvingAddS16, U32, U32, U32 ) -OPCODE(PackedHalvingSubU16, U32, U32, U32 ) -OPCODE(PackedHalvingSubS16, U32, U32, U32 ) -OPCODE(PackedHalvingAddSubU16, U32, U32, U32 ) -OPCODE(PackedHalvingAddSubS16, U32, U32, U32 ) -OPCODE(PackedHalvingSubAddU16, U32, U32, U32 ) -OPCODE(PackedHalvingSubAddS16, U32, U32, U32 ) -OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) -OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) -OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) -OPCODE(PackedSaturatedSubS8, U32, U32, U32 ) -OPCODE(PackedSaturatedAddU16, U32, U32, U32 ) -OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) -OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) -OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) -OPCODE(PackedAbsDiffSumS8, U32, U32, U32 ) -OPCODE(PackedSelect, U32, U32, U32, U32 ) +OPCODE(PackedAddU8, U32, U32, U32 ) +OPCODE(PackedAddS8, U32, U32, U32 ) +OPCODE(PackedSubU8, U32, U32, U32 ) +OPCODE(PackedSubS8, U32, U32, U32 ) +OPCODE(PackedAddU16, U32, U32, U32 ) +OPCODE(PackedAddS16, U32, U32, U32 ) +OPCODE(PackedSubU16, U32, U32, U32 ) +OPCODE(PackedSubS16, U32, U32, U32 ) +OPCODE(PackedAddSubU16, U32, U32, U32 ) +OPCODE(PackedAddSubS16, U32, U32, U32 ) +OPCODE(PackedSubAddU16, U32, U32, U32 ) +OPCODE(PackedSubAddS16, U32, U32, U32 ) +OPCODE(PackedHalvingAddU8, U32, U32, U32 ) +OPCODE(PackedHalvingAddS8, U32, U32, U32 ) +OPCODE(PackedHalvingSubU8, U32, U32, U32 ) +OPCODE(PackedHalvingSubS8, U32, U32, U32 ) +OPCODE(PackedHalvingAddU16, U32, U32, U32 ) +OPCODE(PackedHalvingAddS16, U32, U32, U32 ) +OPCODE(PackedHalvingSubU16, U32, U32, U32 ) +OPCODE(PackedHalvingSubS16, U32, U32, U32 ) +OPCODE(PackedHalvingAddSubU16, U32, U32, U32 ) +OPCODE(PackedHalvingAddSubS16, U32, U32, U32 ) +OPCODE(PackedHalvingSubAddU16, U32, U32, U32 ) +OPCODE(PackedHalvingSubAddS16, U32, U32, U32 ) +OPCODE(PackedSaturatedAddU8, U32, U32, U32 ) +OPCODE(PackedSaturatedAddS8, U32, U32, U32 ) +OPCODE(PackedSaturatedSubU8, U32, U32, U32 ) +OPCODE(PackedSaturatedSubS8, U32, U32, U32 ) +OPCODE(PackedSaturatedAddU16, U32, U32, U32 ) +OPCODE(PackedSaturatedAddS16, U32, U32, U32 ) +OPCODE(PackedSaturatedSubU16, U32, U32, U32 ) +OPCODE(PackedSaturatedSubS16, U32, U32, U32 ) +OPCODE(PackedAbsDiffSumS8, U32, U32, U32 ) +OPCODE(PackedSelect, U32, U32, U32, U32 ) // CRC instructions -OPCODE(CRC32Castagnoli8, U32, U32, U32 ) -OPCODE(CRC32Castagnoli16, U32, U32, U32 ) -OPCODE(CRC32Castagnoli32, U32, U32, U32 ) -OPCODE(CRC32Castagnoli64, U32, U32, U64 ) -OPCODE(CRC32ISO8, U32, U32, U32 ) -OPCODE(CRC32ISO16, U32, U32, U32 ) -OPCODE(CRC32ISO32, U32, U32, U32 ) -OPCODE(CRC32ISO64, U32, U32, U64 ) +OPCODE(CRC32Castagnoli8, U32, U32, U32 ) +OPCODE(CRC32Castagnoli16, U32, U32, U32 ) +OPCODE(CRC32Castagnoli32, U32, U32, U32 ) +OPCODE(CRC32Castagnoli64, U32, U32, U64 ) +OPCODE(CRC32ISO8, U32, U32, U32 ) +OPCODE(CRC32ISO16, U32, U32, U32 ) +OPCODE(CRC32ISO32, U32, U32, U32 ) +OPCODE(CRC32ISO64, U32, U32, U64 ) // AES instructions -OPCODE(AESDecryptSingleRound, U128, U128 ) -OPCODE(AESEncryptSingleRound, U128, U128 ) -OPCODE(AESInverseMixColumns, U128, U128 ) -OPCODE(AESMixColumns, U128, U128 ) +OPCODE(AESDecryptSingleRound, U128, U128 ) +OPCODE(AESEncryptSingleRound, U128, U128 ) +OPCODE(AESInverseMixColumns, U128, U128 ) +OPCODE(AESMixColumns, U128, U128 ) // SM4 instructions -OPCODE(SM4AccessSubstitutionBox, U8, U8 ) +OPCODE(SM4AccessSubstitutionBox, U8, U8 ) // Vector instructions -OPCODE(VectorGetElement8, U8, U128, U8 ) -OPCODE(VectorGetElement16, U16, U128, U8 ) -OPCODE(VectorGetElement32, U32, U128, U8 ) -OPCODE(VectorGetElement64, U64, U128, U8 ) -OPCODE(VectorSetElement8, U128, U128, U8, U8 ) -OPCODE(VectorSetElement16, U128, U128, U8, U16 ) -OPCODE(VectorSetElement32, U128, U128, U8, U32 ) -OPCODE(VectorSetElement64, U128, U128, U8, U64 ) -OPCODE(VectorAbs8, U128, U128 ) -OPCODE(VectorAbs16, U128, U128 ) -OPCODE(VectorAbs32, U128, U128 ) -OPCODE(VectorAbs64, U128, U128 ) -OPCODE(VectorAdd8, U128, U128, U128 ) -OPCODE(VectorAdd16, U128, U128, U128 ) -OPCODE(VectorAdd32, U128, U128, U128 ) -OPCODE(VectorAdd64, U128, U128, U128 ) -OPCODE(VectorAnd, U128, U128, U128 ) -OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 ) -OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 ) -OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 ) -OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 ) -OPCODE(VectorBroadcastLower8, U128, U8 ) -OPCODE(VectorBroadcastLower16, U128, U16 ) -OPCODE(VectorBroadcastLower32, U128, U32 ) -OPCODE(VectorBroadcast8, U128, U8 ) -OPCODE(VectorBroadcast16, U128, U16 ) -OPCODE(VectorBroadcast32, U128, U32 ) -OPCODE(VectorBroadcast64, U128, U64 ) -OPCODE(VectorDeinterleaveEven8, U128, U128, U128 ) -OPCODE(VectorDeinterleaveEven16, U128, U128, U128 ) -OPCODE(VectorDeinterleaveEven32, U128, U128, U128 ) -OPCODE(VectorDeinterleaveEven64, U128, U128, U128 ) -OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 ) -OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 ) -OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 ) -OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 ) -OPCODE(VectorEor, U128, U128, U128 ) -OPCODE(VectorEqual8, U128, U128, U128 ) -OPCODE(VectorEqual16, U128, U128, U128 ) -OPCODE(VectorEqual32, U128, U128, U128 ) -OPCODE(VectorEqual64, U128, U128, U128 ) -OPCODE(VectorEqual128, U128, U128, U128 ) -OPCODE(VectorExtract, U128, U128, U128, U8 ) -OPCODE(VectorExtractLower, U128, U128, U128, U8 ) -OPCODE(VectorGreaterS8, U128, U128, U128 ) -OPCODE(VectorGreaterS16, U128, U128, U128 ) -OPCODE(VectorGreaterS32, U128, U128, U128 ) -OPCODE(VectorGreaterS64, U128, U128, U128 ) -OPCODE(VectorHalvingAddS8, U128, U128, U128 ) -OPCODE(VectorHalvingAddS16, U128, U128, U128 ) -OPCODE(VectorHalvingAddS32, U128, U128, U128 ) -OPCODE(VectorHalvingAddU8, U128, U128, U128 ) -OPCODE(VectorHalvingAddU16, U128, U128, U128 ) -OPCODE(VectorHalvingAddU32, U128, U128, U128 ) -OPCODE(VectorHalvingSubS8, U128, U128, U128 ) -OPCODE(VectorHalvingSubS16, U128, U128, U128 ) -OPCODE(VectorHalvingSubS32, U128, U128, U128 ) -OPCODE(VectorHalvingSubU8, U128, U128, U128 ) -OPCODE(VectorHalvingSubU16, U128, U128, U128 ) -OPCODE(VectorHalvingSubU32, U128, U128, U128 ) -OPCODE(VectorInterleaveLower8, U128, U128, U128 ) -OPCODE(VectorInterleaveLower16, U128, U128, U128 ) -OPCODE(VectorInterleaveLower32, U128, U128, U128 ) -OPCODE(VectorInterleaveLower64, U128, U128, U128 ) -OPCODE(VectorInterleaveUpper8, U128, U128, U128 ) -OPCODE(VectorInterleaveUpper16, U128, U128, U128 ) -OPCODE(VectorInterleaveUpper32, U128, U128, U128 ) -OPCODE(VectorInterleaveUpper64, U128, U128, U128 ) -OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 ) -OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 ) -OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 ) -OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 ) -OPCODE(VectorLogicalShiftRight8, U128, U128, U8 ) -OPCODE(VectorLogicalShiftRight16, U128, U128, U8 ) -OPCODE(VectorLogicalShiftRight32, U128, U128, U8 ) -OPCODE(VectorLogicalShiftRight64, U128, U128, U8 ) -OPCODE(VectorLogicalVShiftS8, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS16, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS32, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftS64, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU8, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU16, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU32, U128, U128, U128 ) -OPCODE(VectorLogicalVShiftU64, U128, U128, U128 ) -OPCODE(VectorMaxS8, U128, U128, U128 ) -OPCODE(VectorMaxS16, U128, U128, U128 ) -OPCODE(VectorMaxS32, U128, U128, U128 ) -OPCODE(VectorMaxS64, U128, U128, U128 ) -OPCODE(VectorMaxU8, U128, U128, U128 ) -OPCODE(VectorMaxU16, U128, U128, U128 ) -OPCODE(VectorMaxU32, U128, U128, U128 ) -OPCODE(VectorMaxU64, U128, U128, U128 ) -OPCODE(VectorMinS8, U128, U128, U128 ) -OPCODE(VectorMinS16, U128, U128, U128 ) -OPCODE(VectorMinS32, U128, U128, U128 ) -OPCODE(VectorMinS64, U128, U128, U128 ) -OPCODE(VectorMinU8, U128, U128, U128 ) -OPCODE(VectorMinU16, U128, U128, U128 ) -OPCODE(VectorMinU32, U128, U128, U128 ) -OPCODE(VectorMinU64, U128, U128, U128 ) -OPCODE(VectorMultiply8, U128, U128, U128 ) -OPCODE(VectorMultiply16, U128, U128, U128 ) -OPCODE(VectorMultiply32, U128, U128, U128 ) -OPCODE(VectorMultiply64, U128, U128, U128 ) -OPCODE(VectorNarrow16, U128, U128 ) -OPCODE(VectorNarrow32, U128, U128 ) -OPCODE(VectorNarrow64, U128, U128 ) -OPCODE(VectorNot, U128, U128 ) -OPCODE(VectorOr, U128, U128, U128 ) -OPCODE(VectorPairedAddLower8, U128, U128, U128 ) -OPCODE(VectorPairedAddLower16, U128, U128, U128 ) -OPCODE(VectorPairedAddLower32, U128, U128, U128 ) -OPCODE(VectorPairedAddSignedWiden8, U128, U128 ) -OPCODE(VectorPairedAddSignedWiden16, U128, U128 ) -OPCODE(VectorPairedAddSignedWiden32, U128, U128 ) -OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 ) -OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 ) -OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 ) -OPCODE(VectorPairedAdd8, U128, U128, U128 ) -OPCODE(VectorPairedAdd16, U128, U128, U128 ) -OPCODE(VectorPairedAdd32, U128, U128, U128 ) -OPCODE(VectorPairedAdd64, U128, U128, U128 ) -OPCODE(VectorPairedMaxS8, U128, U128, U128 ) -OPCODE(VectorPairedMaxS16, U128, U128, U128 ) -OPCODE(VectorPairedMaxS32, U128, U128, U128 ) -OPCODE(VectorPairedMaxU8, U128, U128, U128 ) -OPCODE(VectorPairedMaxU16, U128, U128, U128 ) -OPCODE(VectorPairedMaxU32, U128, U128, U128 ) -OPCODE(VectorPairedMinS8, U128, U128, U128 ) -OPCODE(VectorPairedMinS16, U128, U128, U128 ) -OPCODE(VectorPairedMinS32, U128, U128, U128 ) -OPCODE(VectorPairedMinU8, U128, U128, U128 ) -OPCODE(VectorPairedMinU16, U128, U128, U128 ) -OPCODE(VectorPairedMinU32, U128, U128, U128 ) -OPCODE(VectorPolynomialMultiply8, U128, U128, U128 ) -OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 ) -OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) -OPCODE(VectorPopulationCount, U128, U128 ) -OPCODE(VectorReverseBits, U128, U128 ) -OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 ) -OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 ) -OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 ) -OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 ) -OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 ) -OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 ) -OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 ) -OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 ) -OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 ) -OPCODE(VectorShuffleWords, U128, U128, U8 ) -OPCODE(VectorSignExtend8, U128, U128 ) -OPCODE(VectorSignExtend16, U128, U128 ) -OPCODE(VectorSignExtend32, U128, U128 ) -OPCODE(VectorSignExtend64, U128, U128 ) -OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 ) -OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 ) -OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 ) -OPCODE(VectorSignedSaturatedAbs8, U128, U128 ) -OPCODE(VectorSignedSaturatedAbs16, U128, U128 ) -OPCODE(VectorSignedSaturatedAbs32, U128, U128 ) -OPCODE(VectorSignedSaturatedAbs64, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 ) -OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 ) -OPCODE(VectorSub8, U128, U128, U128 ) -OPCODE(VectorSub16, U128, U128, U128 ) -OPCODE(VectorSub32, U128, U128, U128 ) -OPCODE(VectorSub64, U128, U128, U128 ) -OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque ) -OPCODE(VectorTableLookup, U128, U128, Table, U128 ) -OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 ) -OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 ) -OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) -OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 ) -OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 ) -OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 ) -OPCODE(VectorZeroExtend8, U128, U128 ) -OPCODE(VectorZeroExtend16, U128, U128 ) -OPCODE(VectorZeroExtend32, U128, U128 ) -OPCODE(VectorZeroExtend64, U128, U128 ) -OPCODE(VectorZeroUpper, U128, U128 ) -OPCODE(ZeroVector, U128, ) +OPCODE(VectorGetElement8, U8, U128, U8 ) +OPCODE(VectorGetElement16, U16, U128, U8 ) +OPCODE(VectorGetElement32, U32, U128, U8 ) +OPCODE(VectorGetElement64, U64, U128, U8 ) +OPCODE(VectorSetElement8, U128, U128, U8, U8 ) +OPCODE(VectorSetElement16, U128, U128, U8, U16 ) +OPCODE(VectorSetElement32, U128, U128, U8, U32 ) +OPCODE(VectorSetElement64, U128, U128, U8, U64 ) +OPCODE(VectorAbs8, U128, U128 ) +OPCODE(VectorAbs16, U128, U128 ) +OPCODE(VectorAbs32, U128, U128 ) +OPCODE(VectorAbs64, U128, U128 ) +OPCODE(VectorAdd8, U128, U128, U128 ) +OPCODE(VectorAdd16, U128, U128, U128 ) +OPCODE(VectorAdd32, U128, U128, U128 ) +OPCODE(VectorAdd64, U128, U128, U128 ) +OPCODE(VectorAnd, U128, U128, U128 ) +OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 ) +OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 ) +OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 ) +OPCODE(VectorArithmeticShiftRight64, U128, U128, U8 ) +OPCODE(VectorBroadcastLower8, U128, U8 ) +OPCODE(VectorBroadcastLower16, U128, U16 ) +OPCODE(VectorBroadcastLower32, U128, U32 ) +OPCODE(VectorBroadcast8, U128, U8 ) +OPCODE(VectorBroadcast16, U128, U16 ) +OPCODE(VectorBroadcast32, U128, U32 ) +OPCODE(VectorBroadcast64, U128, U64 ) +OPCODE(VectorDeinterleaveEven8, U128, U128, U128 ) +OPCODE(VectorDeinterleaveEven16, U128, U128, U128 ) +OPCODE(VectorDeinterleaveEven32, U128, U128, U128 ) +OPCODE(VectorDeinterleaveEven64, U128, U128, U128 ) +OPCODE(VectorDeinterleaveOdd8, U128, U128, U128 ) +OPCODE(VectorDeinterleaveOdd16, U128, U128, U128 ) +OPCODE(VectorDeinterleaveOdd32, U128, U128, U128 ) +OPCODE(VectorDeinterleaveOdd64, U128, U128, U128 ) +OPCODE(VectorEor, U128, U128, U128 ) +OPCODE(VectorEqual8, U128, U128, U128 ) +OPCODE(VectorEqual16, U128, U128, U128 ) +OPCODE(VectorEqual32, U128, U128, U128 ) +OPCODE(VectorEqual64, U128, U128, U128 ) +OPCODE(VectorEqual128, U128, U128, U128 ) +OPCODE(VectorExtract, U128, U128, U128, U8 ) +OPCODE(VectorExtractLower, U128, U128, U128, U8 ) +OPCODE(VectorGreaterS8, U128, U128, U128 ) +OPCODE(VectorGreaterS16, U128, U128, U128 ) +OPCODE(VectorGreaterS32, U128, U128, U128 ) +OPCODE(VectorGreaterS64, U128, U128, U128 ) +OPCODE(VectorHalvingAddS8, U128, U128, U128 ) +OPCODE(VectorHalvingAddS16, U128, U128, U128 ) +OPCODE(VectorHalvingAddS32, U128, U128, U128 ) +OPCODE(VectorHalvingAddU8, U128, U128, U128 ) +OPCODE(VectorHalvingAddU16, U128, U128, U128 ) +OPCODE(VectorHalvingAddU32, U128, U128, U128 ) +OPCODE(VectorHalvingSubS8, U128, U128, U128 ) +OPCODE(VectorHalvingSubS16, U128, U128, U128 ) +OPCODE(VectorHalvingSubS32, U128, U128, U128 ) +OPCODE(VectorHalvingSubU8, U128, U128, U128 ) +OPCODE(VectorHalvingSubU16, U128, U128, U128 ) +OPCODE(VectorHalvingSubU32, U128, U128, U128 ) +OPCODE(VectorInterleaveLower8, U128, U128, U128 ) +OPCODE(VectorInterleaveLower16, U128, U128, U128 ) +OPCODE(VectorInterleaveLower32, U128, U128, U128 ) +OPCODE(VectorInterleaveLower64, U128, U128, U128 ) +OPCODE(VectorInterleaveUpper8, U128, U128, U128 ) +OPCODE(VectorInterleaveUpper16, U128, U128, U128 ) +OPCODE(VectorInterleaveUpper32, U128, U128, U128 ) +OPCODE(VectorInterleaveUpper64, U128, U128, U128 ) +OPCODE(VectorLogicalShiftLeft8, U128, U128, U8 ) +OPCODE(VectorLogicalShiftLeft16, U128, U128, U8 ) +OPCODE(VectorLogicalShiftLeft32, U128, U128, U8 ) +OPCODE(VectorLogicalShiftLeft64, U128, U128, U8 ) +OPCODE(VectorLogicalShiftRight8, U128, U128, U8 ) +OPCODE(VectorLogicalShiftRight16, U128, U128, U8 ) +OPCODE(VectorLogicalShiftRight32, U128, U128, U8 ) +OPCODE(VectorLogicalShiftRight64, U128, U128, U8 ) +OPCODE(VectorLogicalVShiftS8, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftS16, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftS32, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftS64, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftU8, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftU16, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftU32, U128, U128, U128 ) +OPCODE(VectorLogicalVShiftU64, U128, U128, U128 ) +OPCODE(VectorMaxS8, U128, U128, U128 ) +OPCODE(VectorMaxS16, U128, U128, U128 ) +OPCODE(VectorMaxS32, U128, U128, U128 ) +OPCODE(VectorMaxS64, U128, U128, U128 ) +OPCODE(VectorMaxU8, U128, U128, U128 ) +OPCODE(VectorMaxU16, U128, U128, U128 ) +OPCODE(VectorMaxU32, U128, U128, U128 ) +OPCODE(VectorMaxU64, U128, U128, U128 ) +OPCODE(VectorMinS8, U128, U128, U128 ) +OPCODE(VectorMinS16, U128, U128, U128 ) +OPCODE(VectorMinS32, U128, U128, U128 ) +OPCODE(VectorMinS64, U128, U128, U128 ) +OPCODE(VectorMinU8, U128, U128, U128 ) +OPCODE(VectorMinU16, U128, U128, U128 ) +OPCODE(VectorMinU32, U128, U128, U128 ) +OPCODE(VectorMinU64, U128, U128, U128 ) +OPCODE(VectorMultiply8, U128, U128, U128 ) +OPCODE(VectorMultiply16, U128, U128, U128 ) +OPCODE(VectorMultiply32, U128, U128, U128 ) +OPCODE(VectorMultiply64, U128, U128, U128 ) +OPCODE(VectorNarrow16, U128, U128 ) +OPCODE(VectorNarrow32, U128, U128 ) +OPCODE(VectorNarrow64, U128, U128 ) +OPCODE(VectorNot, U128, U128 ) +OPCODE(VectorOr, U128, U128, U128 ) +OPCODE(VectorPairedAddLower8, U128, U128, U128 ) +OPCODE(VectorPairedAddLower16, U128, U128, U128 ) +OPCODE(VectorPairedAddLower32, U128, U128, U128 ) +OPCODE(VectorPairedAddSignedWiden8, U128, U128 ) +OPCODE(VectorPairedAddSignedWiden16, U128, U128 ) +OPCODE(VectorPairedAddSignedWiden32, U128, U128 ) +OPCODE(VectorPairedAddUnsignedWiden8, U128, U128 ) +OPCODE(VectorPairedAddUnsignedWiden16, U128, U128 ) +OPCODE(VectorPairedAddUnsignedWiden32, U128, U128 ) +OPCODE(VectorPairedAdd8, U128, U128, U128 ) +OPCODE(VectorPairedAdd16, U128, U128, U128 ) +OPCODE(VectorPairedAdd32, U128, U128, U128 ) +OPCODE(VectorPairedAdd64, U128, U128, U128 ) +OPCODE(VectorPairedMaxS8, U128, U128, U128 ) +OPCODE(VectorPairedMaxS16, U128, U128, U128 ) +OPCODE(VectorPairedMaxS32, U128, U128, U128 ) +OPCODE(VectorPairedMaxU8, U128, U128, U128 ) +OPCODE(VectorPairedMaxU16, U128, U128, U128 ) +OPCODE(VectorPairedMaxU32, U128, U128, U128 ) +OPCODE(VectorPairedMinS8, U128, U128, U128 ) +OPCODE(VectorPairedMinS16, U128, U128, U128 ) +OPCODE(VectorPairedMinS32, U128, U128, U128 ) +OPCODE(VectorPairedMinU8, U128, U128, U128 ) +OPCODE(VectorPairedMinU16, U128, U128, U128 ) +OPCODE(VectorPairedMinU32, U128, U128, U128 ) +OPCODE(VectorPolynomialMultiply8, U128, U128, U128 ) +OPCODE(VectorPolynomialMultiplyLong8, U128, U128, U128 ) +OPCODE(VectorPolynomialMultiplyLong64, U128, U128, U128 ) +OPCODE(VectorPopulationCount, U128, U128 ) +OPCODE(VectorReverseBits, U128, U128 ) +OPCODE(VectorRoundingHalvingAddS8, U128, U128, U128 ) +OPCODE(VectorRoundingHalvingAddS16, U128, U128, U128 ) +OPCODE(VectorRoundingHalvingAddS32, U128, U128, U128 ) +OPCODE(VectorRoundingHalvingAddU8, U128, U128, U128 ) +OPCODE(VectorRoundingHalvingAddU16, U128, U128, U128 ) +OPCODE(VectorRoundingHalvingAddU32, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftS8, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftS16, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftS32, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftS64, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftU8, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftU16, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftU32, U128, U128, U128 ) +OPCODE(VectorRoundingShiftLeftU64, U128, U128, U128 ) +OPCODE(VectorShuffleHighHalfwords, U128, U128, U8 ) +OPCODE(VectorShuffleLowHalfwords, U128, U128, U8 ) +OPCODE(VectorShuffleWords, U128, U128, U8 ) +OPCODE(VectorSignExtend8, U128, U128 ) +OPCODE(VectorSignExtend16, U128, U128 ) +OPCODE(VectorSignExtend32, U128, U128 ) +OPCODE(VectorSignExtend64, U128, U128 ) +OPCODE(VectorSignedAbsoluteDifference8, U128, U128, U128 ) +OPCODE(VectorSignedAbsoluteDifference16, U128, U128, U128 ) +OPCODE(VectorSignedAbsoluteDifference32, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedAbs8, U128, U128 ) +OPCODE(VectorSignedSaturatedAbs16, U128, U128 ) +OPCODE(VectorSignedSaturatedAbs32, U128, U128 ) +OPCODE(VectorSignedSaturatedAbs64, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh16, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh32, U128, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToUnsigned16, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToUnsigned32, U128, U128 ) +OPCODE(VectorSignedSaturatedNarrowToUnsigned64, U128, U128 ) +OPCODE(VectorSub8, U128, U128, U128 ) +OPCODE(VectorSub16, U128, U128, U128 ) +OPCODE(VectorSub32, U128, U128, U128 ) +OPCODE(VectorSub64, U128, U128, U128 ) +OPCODE(VectorTable, Table, U128, Opaque, Opaque, Opaque ) +OPCODE(VectorTableLookup, U128, U128, Table, U128 ) +OPCODE(VectorUnsignedAbsoluteDifference8, U128, U128, U128 ) +OPCODE(VectorUnsignedAbsoluteDifference16, U128, U128, U128 ) +OPCODE(VectorUnsignedAbsoluteDifference32, U128, U128, U128 ) +OPCODE(VectorUnsignedSaturatedNarrow16, U128, U128 ) +OPCODE(VectorUnsignedSaturatedNarrow32, U128, U128 ) +OPCODE(VectorUnsignedSaturatedNarrow64, U128, U128 ) +OPCODE(VectorZeroExtend8, U128, U128 ) +OPCODE(VectorZeroExtend16, U128, U128 ) +OPCODE(VectorZeroExtend32, U128, U128 ) +OPCODE(VectorZeroExtend64, U128, U128 ) +OPCODE(VectorZeroUpper, U128, U128 ) +OPCODE(ZeroVector, U128, ) // Floating-point operations -OPCODE(FPAbs32, U32, U32 ) -OPCODE(FPAbs64, U64, U64 ) -OPCODE(FPAdd32, U32, U32, U32 ) -OPCODE(FPAdd64, U64, U64, U64 ) -OPCODE(FPCompare32, NZCV, U32, U32, U1 ) -OPCODE(FPCompare64, NZCV, U64, U64, U1 ) -OPCODE(FPDiv32, U32, U32, U32 ) -OPCODE(FPDiv64, U64, U64, U64 ) -OPCODE(FPMax32, U32, U32, U32 ) -OPCODE(FPMax64, U64, U64, U64 ) -OPCODE(FPMaxNumeric32, U32, U32, U32 ) -OPCODE(FPMaxNumeric64, U64, U64, U64 ) -OPCODE(FPMin32, U32, U32, U32 ) -OPCODE(FPMin64, U64, U64, U64 ) -OPCODE(FPMinNumeric32, U32, U32, U32 ) -OPCODE(FPMinNumeric64, U64, U64, U64 ) -OPCODE(FPMul32, U32, U32, U32 ) -OPCODE(FPMul64, U64, U64, U64 ) -OPCODE(FPMulAdd32, U32, U32, U32, U32 ) -OPCODE(FPMulAdd64, U64, U64, U64, U64 ) -OPCODE(FPMulX32, U32, U32, U32 ) -OPCODE(FPMulX64, U64, U64, U64 ) -OPCODE(FPNeg32, U32, U32 ) -OPCODE(FPNeg64, U64, U64 ) -OPCODE(FPRecipEstimate32, U32, U32 ) -OPCODE(FPRecipEstimate64, U64, U64 ) -OPCODE(FPRecipStepFused32, U32, U32, U32 ) -OPCODE(FPRecipStepFused64, U64, U64, U64 ) -OPCODE(FPRoundInt32, U32, U32, U8, U1 ) -OPCODE(FPRoundInt64, U64, U64, U8, U1 ) -OPCODE(FPRSqrtEstimate32, U32, U32 ) -OPCODE(FPRSqrtEstimate64, U64, U64 ) -OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) -OPCODE(FPRSqrtStepFused64, U64, U64, U64 ) -OPCODE(FPSqrt32, U32, U32 ) -OPCODE(FPSqrt64, U64, U64 ) -OPCODE(FPSub32, U32, U32, U32 ) -OPCODE(FPSub64, U64, U64, U64 ) +OPCODE(FPAbs32, U32, U32 ) +OPCODE(FPAbs64, U64, U64 ) +OPCODE(FPAdd32, U32, U32, U32 ) +OPCODE(FPAdd64, U64, U64, U64 ) +OPCODE(FPCompare32, NZCV, U32, U32, U1 ) +OPCODE(FPCompare64, NZCV, U64, U64, U1 ) +OPCODE(FPDiv32, U32, U32, U32 ) +OPCODE(FPDiv64, U64, U64, U64 ) +OPCODE(FPMax32, U32, U32, U32 ) +OPCODE(FPMax64, U64, U64, U64 ) +OPCODE(FPMaxNumeric32, U32, U32, U32 ) +OPCODE(FPMaxNumeric64, U64, U64, U64 ) +OPCODE(FPMin32, U32, U32, U32 ) +OPCODE(FPMin64, U64, U64, U64 ) +OPCODE(FPMinNumeric32, U32, U32, U32 ) +OPCODE(FPMinNumeric64, U64, U64, U64 ) +OPCODE(FPMul32, U32, U32, U32 ) +OPCODE(FPMul64, U64, U64, U64 ) +OPCODE(FPMulAdd32, U32, U32, U32, U32 ) +OPCODE(FPMulAdd64, U64, U64, U64, U64 ) +OPCODE(FPMulX32, U32, U32, U32 ) +OPCODE(FPMulX64, U64, U64, U64 ) +OPCODE(FPNeg32, U32, U32 ) +OPCODE(FPNeg64, U64, U64 ) +OPCODE(FPRecipEstimate32, U32, U32 ) +OPCODE(FPRecipEstimate64, U64, U64 ) +OPCODE(FPRecipStepFused32, U32, U32, U32 ) +OPCODE(FPRecipStepFused64, U64, U64, U64 ) +OPCODE(FPRoundInt32, U32, U32, U8, U1 ) +OPCODE(FPRoundInt64, U64, U64, U8, U1 ) +OPCODE(FPRSqrtEstimate32, U32, U32 ) +OPCODE(FPRSqrtEstimate64, U64, U64 ) +OPCODE(FPRSqrtStepFused32, U32, U32, U32 ) +OPCODE(FPRSqrtStepFused64, U64, U64, U64 ) +OPCODE(FPSqrt32, U32, U32 ) +OPCODE(FPSqrt64, U64, U64 ) +OPCODE(FPSub32, U32, U32, U32 ) +OPCODE(FPSub64, U64, U64, U64 ) // Floating-point conversions -OPCODE(FPSingleToDouble, U64, U32 ) -OPCODE(FPDoubleToSingle, U32, U64 ) -OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) -OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) -OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) -OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) -OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) -OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) -OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) -OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) -OPCODE(FPU32ToSingle, U32, U32, U1 ) -OPCODE(FPS32ToSingle, U32, U32, U1 ) -OPCODE(FPU32ToDouble, U64, U32, U1 ) -OPCODE(FPU64ToDouble, U64, U64, U1 ) -OPCODE(FPU64ToSingle, U32, U64, U1 ) -OPCODE(FPS32ToDouble, U64, U32, U1 ) -OPCODE(FPS64ToDouble, U64, U64, U1 ) -OPCODE(FPS64ToSingle, U32, U64, U1 ) +OPCODE(FPSingleToDouble, U64, U32 ) +OPCODE(FPDoubleToSingle, U32, U64 ) +OPCODE(FPDoubleToFixedS32, U32, U64, U8, U8 ) +OPCODE(FPDoubleToFixedS64, U64, U64, U8, U8 ) +OPCODE(FPDoubleToFixedU32, U32, U64, U8, U8 ) +OPCODE(FPDoubleToFixedU64, U64, U64, U8, U8 ) +OPCODE(FPSingleToFixedS32, U32, U32, U8, U8 ) +OPCODE(FPSingleToFixedS64, U64, U32, U8, U8 ) +OPCODE(FPSingleToFixedU32, U32, U32, U8, U8 ) +OPCODE(FPSingleToFixedU64, U64, U32, U8, U8 ) +OPCODE(FPU32ToSingle, U32, U32, U1 ) +OPCODE(FPS32ToSingle, U32, U32, U1 ) +OPCODE(FPU32ToDouble, U64, U32, U1 ) +OPCODE(FPU64ToDouble, U64, U64, U1 ) +OPCODE(FPU64ToSingle, U32, U64, U1 ) +OPCODE(FPS32ToDouble, U64, U32, U1 ) +OPCODE(FPS64ToDouble, U64, U64, U1 ) +OPCODE(FPS64ToSingle, U32, U64, U1 ) // Floating-point vector instructions -OPCODE(FPVectorAbs16, U128, U128 ) -OPCODE(FPVectorAbs32, U128, U128 ) -OPCODE(FPVectorAbs64, U128, U128 ) -OPCODE(FPVectorAdd32, U128, U128, U128 ) -OPCODE(FPVectorAdd64, U128, U128, U128 ) -OPCODE(FPVectorDiv32, U128, U128, U128 ) -OPCODE(FPVectorDiv64, U128, U128, U128 ) -OPCODE(FPVectorEqual32, U128, U128, U128 ) -OPCODE(FPVectorEqual64, U128, U128, U128 ) -OPCODE(FPVectorGreater32, U128, U128, U128 ) -OPCODE(FPVectorGreater64, U128, U128, U128 ) -OPCODE(FPVectorGreaterEqual32, U128, U128, U128 ) -OPCODE(FPVectorGreaterEqual64, U128, U128, U128 ) -OPCODE(FPVectorMax32, U128, U128, U128 ) -OPCODE(FPVectorMax64, U128, U128, U128 ) -OPCODE(FPVectorMin32, U128, U128, U128 ) -OPCODE(FPVectorMin64, U128, U128, U128 ) -OPCODE(FPVectorMul32, U128, U128, U128 ) -OPCODE(FPVectorMul64, U128, U128, U128 ) -OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 ) -OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 ) -OPCODE(FPVectorNeg16, U128, U128 ) -OPCODE(FPVectorNeg32, U128, U128 ) -OPCODE(FPVectorNeg64, U128, U128 ) -OPCODE(FPVectorPairedAdd32, U128, U128, U128 ) -OPCODE(FPVectorPairedAdd64, U128, U128, U128 ) -OPCODE(FPVectorPairedAddLower32, U128, U128, U128 ) -OPCODE(FPVectorPairedAddLower64, U128, U128, U128 ) -OPCODE(FPVectorRecipEstimate32, U128, U128 ) -OPCODE(FPVectorRecipEstimate64, U128, U128 ) -OPCODE(FPVectorRecipStepFused32, U128, U128, U128 ) -OPCODE(FPVectorRecipStepFused64, U128, U128, U128 ) -OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) -OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) -OPCODE(FPVectorRSqrtEstimate32, U128, U128 ) -OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) -OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) -OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 ) -OPCODE(FPVectorS32ToSingle, U128, U128 ) -OPCODE(FPVectorS64ToDouble, U128, U128 ) -OPCODE(FPVectorSub32, U128, U128, U128 ) -OPCODE(FPVectorSub64, U128, U128, U128 ) -OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 ) -OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 ) -OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 ) -OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 ) -OPCODE(FPVectorU32ToSingle, U128, U128 ) -OPCODE(FPVectorU64ToDouble, U128, U128 ) +OPCODE(FPVectorAbs16, U128, U128 ) +OPCODE(FPVectorAbs32, U128, U128 ) +OPCODE(FPVectorAbs64, U128, U128 ) +OPCODE(FPVectorAdd32, U128, U128, U128 ) +OPCODE(FPVectorAdd64, U128, U128, U128 ) +OPCODE(FPVectorDiv32, U128, U128, U128 ) +OPCODE(FPVectorDiv64, U128, U128, U128 ) +OPCODE(FPVectorEqual32, U128, U128, U128 ) +OPCODE(FPVectorEqual64, U128, U128, U128 ) +OPCODE(FPVectorGreater32, U128, U128, U128 ) +OPCODE(FPVectorGreater64, U128, U128, U128 ) +OPCODE(FPVectorGreaterEqual32, U128, U128, U128 ) +OPCODE(FPVectorGreaterEqual64, U128, U128, U128 ) +OPCODE(FPVectorMax32, U128, U128, U128 ) +OPCODE(FPVectorMax64, U128, U128, U128 ) +OPCODE(FPVectorMin32, U128, U128, U128 ) +OPCODE(FPVectorMin64, U128, U128, U128 ) +OPCODE(FPVectorMul32, U128, U128, U128 ) +OPCODE(FPVectorMul64, U128, U128, U128 ) +OPCODE(FPVectorMulAdd32, U128, U128, U128, U128 ) +OPCODE(FPVectorMulAdd64, U128, U128, U128, U128 ) +OPCODE(FPVectorNeg16, U128, U128 ) +OPCODE(FPVectorNeg32, U128, U128 ) +OPCODE(FPVectorNeg64, U128, U128 ) +OPCODE(FPVectorPairedAdd32, U128, U128, U128 ) +OPCODE(FPVectorPairedAdd64, U128, U128, U128 ) +OPCODE(FPVectorPairedAddLower32, U128, U128, U128 ) +OPCODE(FPVectorPairedAddLower64, U128, U128, U128 ) +OPCODE(FPVectorRecipEstimate32, U128, U128 ) +OPCODE(FPVectorRecipEstimate64, U128, U128 ) +OPCODE(FPVectorRecipStepFused32, U128, U128, U128 ) +OPCODE(FPVectorRecipStepFused64, U128, U128, U128 ) +OPCODE(FPVectorRoundInt32, U128, U128, U8, U1 ) +OPCODE(FPVectorRoundInt64, U128, U128, U8, U1 ) +OPCODE(FPVectorRSqrtEstimate32, U128, U128 ) +OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) +OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) +OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 ) +OPCODE(FPVectorS32ToSingle, U128, U128 ) +OPCODE(FPVectorS64ToDouble, U128, U128 ) +OPCODE(FPVectorSub32, U128, U128, U128 ) +OPCODE(FPVectorSub64, U128, U128, U128 ) +OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 ) +OPCODE(FPVectorToSignedFixed64, U128, U128, U8, U8 ) +OPCODE(FPVectorToUnsignedFixed32, U128, U128, U8, U8 ) +OPCODE(FPVectorToUnsignedFixed64, U128, U128, U8, U8 ) +OPCODE(FPVectorU32ToSingle, U128, U128 ) +OPCODE(FPVectorU64ToDouble, U128, U128 ) // A32 Memory access -A32OPC(ClearExclusive, Void, ) -A32OPC(SetExclusive, Void, U32, U8 ) -A32OPC(ReadMemory8, U8, U32 ) -A32OPC(ReadMemory16, U16, U32 ) -A32OPC(ReadMemory32, U32, U32 ) -A32OPC(ReadMemory64, U64, U32 ) -A32OPC(WriteMemory8, Void, U32, U8 ) -A32OPC(WriteMemory16, Void, U32, U16 ) -A32OPC(WriteMemory32, Void, U32, U32 ) -A32OPC(WriteMemory64, Void, U32, U64 ) -A32OPC(ExclusiveWriteMemory8, U32, U32, U8 ) -A32OPC(ExclusiveWriteMemory16, U32, U32, U16 ) -A32OPC(ExclusiveWriteMemory32, U32, U32, U32 ) -A32OPC(ExclusiveWriteMemory64, U32, U32, U32, U32 ) +A32OPC(ClearExclusive, Void, ) +A32OPC(SetExclusive, Void, U32, U8 ) +A32OPC(ReadMemory8, U8, U32 ) +A32OPC(ReadMemory16, U16, U32 ) +A32OPC(ReadMemory32, U32, U32 ) +A32OPC(ReadMemory64, U64, U32 ) +A32OPC(WriteMemory8, Void, U32, U8 ) +A32OPC(WriteMemory16, Void, U32, U16 ) +A32OPC(WriteMemory32, Void, U32, U32 ) +A32OPC(WriteMemory64, Void, U32, U64 ) +A32OPC(ExclusiveWriteMemory8, U32, U32, U8 ) +A32OPC(ExclusiveWriteMemory16, U32, U32, U16 ) +A32OPC(ExclusiveWriteMemory32, U32, U32, U32 ) +A32OPC(ExclusiveWriteMemory64, U32, U32, U32, U32 ) // A64 Memory access -A64OPC(ClearExclusive, Void, ) -A64OPC(SetExclusive, Void, U64, U8 ) -A64OPC(ReadMemory8, U8, U64 ) -A64OPC(ReadMemory16, U16, U64 ) -A64OPC(ReadMemory32, U32, U64 ) -A64OPC(ReadMemory64, U64, U64 ) -A64OPC(ReadMemory128, U128, U64 ) -A64OPC(WriteMemory8, Void, U64, U8 ) -A64OPC(WriteMemory16, Void, U64, U16 ) -A64OPC(WriteMemory32, Void, U64, U32 ) -A64OPC(WriteMemory64, Void, U64, U64 ) -A64OPC(WriteMemory128, Void, U64, U128 ) -A64OPC(ExclusiveWriteMemory8, U32, U64, U8 ) -A64OPC(ExclusiveWriteMemory16, U32, U64, U16 ) -A64OPC(ExclusiveWriteMemory32, U32, U64, U32 ) -A64OPC(ExclusiveWriteMemory64, U32, U64, U64 ) -A64OPC(ExclusiveWriteMemory128, U32, U64, U128 ) +A64OPC(ClearExclusive, Void, ) +A64OPC(SetExclusive, Void, U64, U8 ) +A64OPC(ReadMemory8, U8, U64 ) +A64OPC(ReadMemory16, U16, U64 ) +A64OPC(ReadMemory32, U32, U64 ) +A64OPC(ReadMemory64, U64, U64 ) +A64OPC(ReadMemory128, U128, U64 ) +A64OPC(WriteMemory8, Void, U64, U8 ) +A64OPC(WriteMemory16, Void, U64, U16 ) +A64OPC(WriteMemory32, Void, U64, U32 ) +A64OPC(WriteMemory64, Void, U64, U64 ) +A64OPC(WriteMemory128, Void, U64, U128 ) +A64OPC(ExclusiveWriteMemory8, U32, U64, U8 ) +A64OPC(ExclusiveWriteMemory16, U32, U64, U16 ) +A64OPC(ExclusiveWriteMemory32, U32, U64, U32 ) +A64OPC(ExclusiveWriteMemory64, U32, U64, U64 ) +A64OPC(ExclusiveWriteMemory128, U32, U64, U128 ) // Coprocessor -A32OPC(CoprocInternalOperation, Void, CoprocInfo ) -A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 ) -A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 ) -A32OPC(CoprocGetOneWord, U32, CoprocInfo ) -A32OPC(CoprocGetTwoWords, U64, CoprocInfo ) -A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 ) -A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 ) +A32OPC(CoprocInternalOperation, Void, CoprocInfo ) +A32OPC(CoprocSendOneWord, Void, CoprocInfo, U32 ) +A32OPC(CoprocSendTwoWords, Void, CoprocInfo, U32, U32 ) +A32OPC(CoprocGetOneWord, U32, CoprocInfo ) +A32OPC(CoprocGetTwoWords, U64, CoprocInfo ) +A32OPC(CoprocLoadWords, Void, CoprocInfo, U32 ) +A32OPC(CoprocStoreWords, Void, CoprocInfo, U32 )