IR: Implement VectorLogicalShiftLeft{8,16,32,64}

This commit is contained in:
MerryMage 2018-02-10 09:31:50 +00:00
parent 15e8231f24
commit f6247125c0
4 changed files with 338 additions and 267 deletions

View file

@ -587,6 +587,53 @@ void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, a);
}
void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const u8 shift_amount = args[1].GetImmediateU8();
// TODO: Optimize
for (size_t i = 0; i < shift_amount; ++i) {
code.paddb(result, result);
}
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const u8 shift_amount = args[1].GetImmediateU8();
code.psllw(result, shift_amount);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const u8 shift_amount = args[1].GetImmediateU8();
code.pslld(result, shift_amount);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const u8 shift_amount = args[1].GetImmediateU8();
code.psllq(result, shift_amount);
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);

View file

@ -873,6 +873,22 @@ U128 IREmitter::VectorInterleaveLower64(const U128& a, const U128& b) {
return Inst<U128>(Opcode::VectorInterleaveLower64, a, b);
}
U128 IREmitter::VectorLogicalShiftLeft8(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft8, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft16(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft16, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft32(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft32, a, Imm8(shift_amount));
}
U128 IREmitter::VectorLogicalShiftLeft64(const U128& a, u8 shift_amount) {
return Inst<U128>(Opcode::VectorLogicalShiftLeft64, a, Imm8(shift_amount));
}
U128 IREmitter::VectorNot(const U128& a) {
return Inst<U128>(Opcode::VectorNot, a);
}

View file

@ -229,6 +229,10 @@ public:
U128 VectorInterleaveLower16(const U128& a, const U128& b);
U128 VectorInterleaveLower32(const U128& a, const U128& b);
U128 VectorInterleaveLower64(const U128& a, const U128& b);
U128 VectorLogicalShiftLeft8(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft16(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft32(const U128& a, u8 shift_amount);
U128 VectorLogicalShiftLeft64(const U128& a, u8 shift_amount);
U128 VectorNot(const U128& a);
U128 VectorOr(const U128& a, const U128& b);
U128 VectorPairedAdd8(const U128& a, const U128& b);

View file

@ -1,299 +1,303 @@
// opcode name, return type, arg1 type, arg2 type, arg3 type, ...
// opcode name, return type, arg1 type, arg2 type, arg3 type, ...
OPCODE(Void, T::Void, )
OPCODE(Identity, T::Opaque, T::Opaque )
OPCODE(Breakpoint, T::Void, )
OPCODE(Void, T::Void, )
OPCODE(Identity, T::Opaque, T::Opaque )
OPCODE(Breakpoint, T::Void, )
// A32 Context getters/setters
A32OPC(GetRegister, T::U32, T::A32Reg )
A32OPC(GetExtendedRegister32, T::U32, T::A32ExtReg )
A32OPC(GetExtendedRegister64, T::U64, T::A32ExtReg )
A32OPC(SetRegister, T::Void, T::A32Reg, T::U32 )
A32OPC(SetExtendedRegister32, T::Void, T::A32ExtReg, T::U32 )
A32OPC(SetExtendedRegister64, T::Void, T::A32ExtReg, T::U64 )
A32OPC(GetCpsr, T::U32, )
A32OPC(SetCpsr, T::Void, T::U32 )
A32OPC(SetCpsrNZCV, T::Void, T::U32 )
A32OPC(SetCpsrNZCVQ, T::Void, T::U32 )
A32OPC(GetNFlag, T::U1, )
A32OPC(SetNFlag, T::Void, T::U1 )
A32OPC(GetZFlag, T::U1, )
A32OPC(SetZFlag, T::Void, T::U1 )
A32OPC(GetCFlag, T::U1, )
A32OPC(SetCFlag, T::Void, T::U1 )
A32OPC(GetVFlag, T::U1, )
A32OPC(SetVFlag, T::Void, T::U1 )
A32OPC(OrQFlag, T::Void, T::U1 )
A32OPC(GetGEFlags, T::U32, )
A32OPC(SetGEFlags, T::Void, T::U32 )
A32OPC(SetGEFlagsCompressed, T::Void, T::U32 )
A32OPC(BXWritePC, T::Void, T::U32 )
A32OPC(CallSupervisor, T::Void, T::U32 )
A32OPC(ExceptionRaised, T::Void, T::U32, T::U64 )
A32OPC(GetFpscr, T::U32, )
A32OPC(SetFpscr, T::Void, T::U32, )
A32OPC(GetFpscrNZCV, T::U32, )
A32OPC(SetFpscrNZCV, T::Void, T::NZCVFlags )
A32OPC(GetRegister, T::U32, T::A32Reg )
A32OPC(GetExtendedRegister32, T::U32, T::A32ExtReg )
A32OPC(GetExtendedRegister64, T::U64, T::A32ExtReg )
A32OPC(SetRegister, T::Void, T::A32Reg, T::U32 )
A32OPC(SetExtendedRegister32, T::Void, T::A32ExtReg, T::U32 )
A32OPC(SetExtendedRegister64, T::Void, T::A32ExtReg, T::U64 )
A32OPC(GetCpsr, T::U32, )
A32OPC(SetCpsr, T::Void, T::U32 )
A32OPC(SetCpsrNZCV, T::Void, T::U32 )
A32OPC(SetCpsrNZCVQ, T::Void, T::U32 )
A32OPC(GetNFlag, T::U1, )
A32OPC(SetNFlag, T::Void, T::U1 )
A32OPC(GetZFlag, T::U1, )
A32OPC(SetZFlag, T::Void, T::U1 )
A32OPC(GetCFlag, T::U1, )
A32OPC(SetCFlag, T::Void, T::U1 )
A32OPC(GetVFlag, T::U1, )
A32OPC(SetVFlag, T::Void, T::U1 )
A32OPC(OrQFlag, T::Void, T::U1 )
A32OPC(GetGEFlags, T::U32, )
A32OPC(SetGEFlags, T::Void, T::U32 )
A32OPC(SetGEFlagsCompressed, T::Void, T::U32 )
A32OPC(BXWritePC, T::Void, T::U32 )
A32OPC(CallSupervisor, T::Void, T::U32 )
A32OPC(ExceptionRaised, T::Void, T::U32, T::U64 )
A32OPC(GetFpscr, T::U32, )
A32OPC(SetFpscr, T::Void, T::U32, )
A32OPC(GetFpscrNZCV, T::U32, )
A32OPC(SetFpscrNZCV, T::Void, T::NZCVFlags )
// A64 Context getters/setters
A64OPC(SetCheckBit, T::Void, T::U1 )
A64OPC(GetCFlag, T::U1, )
A64OPC(SetNZCV, T::Void, T::NZCVFlags )
A64OPC(GetW, T::U32, T::A64Reg )
A64OPC(GetX, T::U64, T::A64Reg )
//A64OPC(GetB, T::U128, T::A64Vec )
//A64OPC(GetH, T::U128, T::A64Vec )
A64OPC(GetS, T::U128, T::A64Vec )
A64OPC(GetD, T::U128, T::A64Vec )
A64OPC(GetQ, T::U128, T::A64Vec )
A64OPC(GetSP, T::U64, )
A64OPC(SetW, T::Void, T::A64Reg, T::U32 )
A64OPC(SetX, T::Void, T::A64Reg, T::U64 )
//A64OPC(SetB, T::Void, T::A64Vec, T::U8 )
//A64OPC(SetH, T::Void, T::A64Vec, T::U16 )
A64OPC(SetS, T::Void, T::A64Vec, T::U128 )
A64OPC(SetD, T::Void, T::A64Vec, T::U128 )
A64OPC(SetQ, T::Void, T::A64Vec, T::U128 )
A64OPC(SetSP, T::Void, T::U64 )
A64OPC(SetPC, T::Void, T::U64 )
A64OPC(CallSupervisor, T::Void, T::U32 )
A64OPC(ExceptionRaised, T::Void, T::U64, T::U64 )
A64OPC(SetCheckBit, T::Void, T::U1 )
A64OPC(GetCFlag, T::U1, )
A64OPC(SetNZCV, T::Void, T::NZCVFlags )
A64OPC(GetW, T::U32, T::A64Reg )
A64OPC(GetX, T::U64, T::A64Reg )
//A64OPC(GetB, T::U128, T::A64Vec )
//A64OPC(GetH, T::U128, T::A64Vec )
A64OPC(GetS, T::U128, T::A64Vec )
A64OPC(GetD, T::U128, T::A64Vec )
A64OPC(GetQ, T::U128, T::A64Vec )
A64OPC(GetSP, T::U64, )
A64OPC(SetW, T::Void, T::A64Reg, T::U32 )
A64OPC(SetX, T::Void, T::A64Reg, T::U64 )
//A64OPC(SetB, T::Void, T::A64Vec, T::U8 )
//A64OPC(SetH, T::Void, T::A64Vec, T::U16 )
A64OPC(SetS, T::Void, T::A64Vec, T::U128 )
A64OPC(SetD, T::Void, T::A64Vec, T::U128 )
A64OPC(SetQ, T::Void, T::A64Vec, T::U128 )
A64OPC(SetSP, T::Void, T::U64 )
A64OPC(SetPC, T::Void, T::U64 )
A64OPC(CallSupervisor, T::Void, T::U32 )
A64OPC(ExceptionRaised, T::Void, T::U64, T::U64 )
// Hints
OPCODE(PushRSB, T::Void, T::U64 )
OPCODE(PushRSB, T::Void, T::U64 )
// Pseudo-operation, handled specially at final emit
OPCODE(GetCarryFromOp, T::U1, T::U32 )
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
OPCODE(GetGEFromOp, T::U32, T::U32 )
OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque )
OPCODE(GetCarryFromOp, T::U1, T::U32 )
OPCODE(GetOverflowFromOp, T::U1, T::U32 )
OPCODE(GetGEFromOp, T::U32, T::U32 )
OPCODE(GetNZCVFromOp, T::NZCVFlags, T::Opaque )
OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 )
OPCODE(NZCVFromPackedFlags, T::NZCVFlags, T::U32 )
// Calculations
OPCODE(Pack2x32To1x64, T::U64, T::U32, T::U32 )
OPCODE(LeastSignificantWord, T::U32, T::U64 )
OPCODE(MostSignificantWord, T::U32, T::U64 )
OPCODE(LeastSignificantHalf, T::U16, T::U32 )
OPCODE(LeastSignificantByte, T::U8, T::U32 )
OPCODE(MostSignificantBit, T::U1, T::U32 )
OPCODE(IsZero32, T::U1, T::U32 )
OPCODE(IsZero64, T::U1, T::U64 )
OPCODE(TestBit, T::U1, T::U64, T::U8 )
OPCODE(ConditionalSelect32, T::U32, T::Cond, T::U32, T::U32 )
OPCODE(ConditionalSelect64, T::U64, T::Cond, T::U64, T::U64 )
OPCODE(ConditionalSelectNZCV, T::NZCVFlags, T::Cond, T::NZCVFlags, T::NZCVFlags )
OPCODE(LogicalShiftLeft32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(LogicalShiftLeft64, T::U64, T::U64, T::U8 )
OPCODE(LogicalShiftRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(LogicalShiftRight64, T::U64, T::U64, T::U8 )
OPCODE(ArithmeticShiftRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(ArithmeticShiftRight64, T::U64, T::U64, T::U8 )
OPCODE(RotateRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(RotateRight64, T::U64, T::U64, T::U8 )
OPCODE(RotateRightExtended, T::U32, T::U32, T::U1 )
OPCODE(Add32, T::U32, T::U32, T::U32, T::U1 )
OPCODE(Add64, T::U64, T::U64, T::U64, T::U1 )
OPCODE(Sub32, T::U32, T::U32, T::U32, T::U1 )
OPCODE(Sub64, T::U64, T::U64, T::U64, T::U1 )
OPCODE(Mul32, T::U32, T::U32, T::U32 )
OPCODE(Mul64, T::U64, T::U64, T::U64 )
OPCODE(SignedMultiplyHigh64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedMultiplyHigh64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedDiv32, T::U32, T::U32, T::U32 )
OPCODE(UnsignedDiv64, T::U64, T::U64, T::U64 )
OPCODE(SignedDiv32, T::U32, T::U32, T::U32 )
OPCODE(SignedDiv64, T::U64, T::U64, T::U64 )
OPCODE(And32, T::U32, T::U32, T::U32 )
OPCODE(And64, T::U64, T::U64, T::U64 )
OPCODE(Eor32, T::U32, T::U32, T::U32 )
OPCODE(Eor64, T::U64, T::U64, T::U64 )
OPCODE(Or32, T::U32, T::U32, T::U32 )
OPCODE(Or64, T::U64, T::U64, T::U64 )
OPCODE(Not32, T::U32, T::U32 )
OPCODE(Not64, T::U64, T::U64 )
OPCODE(SignExtendByteToWord, T::U32, T::U8 )
OPCODE(SignExtendHalfToWord, T::U32, T::U16 )
OPCODE(SignExtendByteToLong, T::U64, T::U8 )
OPCODE(SignExtendHalfToLong, T::U64, T::U16 )
OPCODE(SignExtendWordToLong, T::U64, T::U32 )
OPCODE(ZeroExtendByteToWord, T::U32, T::U8 )
OPCODE(ZeroExtendHalfToWord, T::U32, T::U16 )
OPCODE(ZeroExtendByteToLong, T::U64, T::U8 )
OPCODE(ZeroExtendHalfToLong, T::U64, T::U16 )
OPCODE(ZeroExtendWordToLong, T::U64, T::U32 )
OPCODE(ZeroExtendLongToQuad, T::U128, T::U64 )
OPCODE(ByteReverseWord, T::U32, T::U32 )
OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(CountLeadingZeros32, T::U32, T::U32 )
OPCODE(CountLeadingZeros64, T::U64, T::U64 )
OPCODE(ExtractRegister32, T::U32, T::U32, T::U32, T::U8 )
OPCODE(ExtractRegister64, T::U64, T::U64, T::U64, T::U8 )
OPCODE(Pack2x32To1x64, T::U64, T::U32, T::U32 )
OPCODE(LeastSignificantWord, T::U32, T::U64 )
OPCODE(MostSignificantWord, T::U32, T::U64 )
OPCODE(LeastSignificantHalf, T::U16, T::U32 )
OPCODE(LeastSignificantByte, T::U8, T::U32 )
OPCODE(MostSignificantBit, T::U1, T::U32 )
OPCODE(IsZero32, T::U1, T::U32 )
OPCODE(IsZero64, T::U1, T::U64 )
OPCODE(TestBit, T::U1, T::U64, T::U8 )
OPCODE(ConditionalSelect32, T::U32, T::Cond, T::U32, T::U32 )
OPCODE(ConditionalSelect64, T::U64, T::Cond, T::U64, T::U64 )
OPCODE(ConditionalSelectNZCV, T::NZCVFlags, T::Cond, T::NZCVFlags, T::NZCVFlags )
OPCODE(LogicalShiftLeft32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(LogicalShiftLeft64, T::U64, T::U64, T::U8 )
OPCODE(LogicalShiftRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(LogicalShiftRight64, T::U64, T::U64, T::U8 )
OPCODE(ArithmeticShiftRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(ArithmeticShiftRight64, T::U64, T::U64, T::U8 )
OPCODE(RotateRight32, T::U32, T::U32, T::U8, T::U1 )
OPCODE(RotateRight64, T::U64, T::U64, T::U8 )
OPCODE(RotateRightExtended, T::U32, T::U32, T::U1 )
OPCODE(Add32, T::U32, T::U32, T::U32, T::U1 )
OPCODE(Add64, T::U64, T::U64, T::U64, T::U1 )
OPCODE(Sub32, T::U32, T::U32, T::U32, T::U1 )
OPCODE(Sub64, T::U64, T::U64, T::U64, T::U1 )
OPCODE(Mul32, T::U32, T::U32, T::U32 )
OPCODE(Mul64, T::U64, T::U64, T::U64 )
OPCODE(SignedMultiplyHigh64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedMultiplyHigh64, T::U64, T::U64, T::U64 )
OPCODE(UnsignedDiv32, T::U32, T::U32, T::U32 )
OPCODE(UnsignedDiv64, T::U64, T::U64, T::U64 )
OPCODE(SignedDiv32, T::U32, T::U32, T::U32 )
OPCODE(SignedDiv64, T::U64, T::U64, T::U64 )
OPCODE(And32, T::U32, T::U32, T::U32 )
OPCODE(And64, T::U64, T::U64, T::U64 )
OPCODE(Eor32, T::U32, T::U32, T::U32 )
OPCODE(Eor64, T::U64, T::U64, T::U64 )
OPCODE(Or32, T::U32, T::U32, T::U32 )
OPCODE(Or64, T::U64, T::U64, T::U64 )
OPCODE(Not32, T::U32, T::U32 )
OPCODE(Not64, T::U64, T::U64 )
OPCODE(SignExtendByteToWord, T::U32, T::U8 )
OPCODE(SignExtendHalfToWord, T::U32, T::U16 )
OPCODE(SignExtendByteToLong, T::U64, T::U8 )
OPCODE(SignExtendHalfToLong, T::U64, T::U16 )
OPCODE(SignExtendWordToLong, T::U64, T::U32 )
OPCODE(ZeroExtendByteToWord, T::U32, T::U8 )
OPCODE(ZeroExtendHalfToWord, T::U32, T::U16 )
OPCODE(ZeroExtendByteToLong, T::U64, T::U8 )
OPCODE(ZeroExtendHalfToLong, T::U64, T::U16 )
OPCODE(ZeroExtendWordToLong, T::U64, T::U32 )
OPCODE(ZeroExtendLongToQuad, T::U128, T::U64 )
OPCODE(ByteReverseWord, T::U32, T::U32 )
OPCODE(ByteReverseHalf, T::U16, T::U16 )
OPCODE(ByteReverseDual, T::U64, T::U64 )
OPCODE(CountLeadingZeros32, T::U32, T::U32 )
OPCODE(CountLeadingZeros64, T::U64, T::U64 )
OPCODE(ExtractRegister32, T::U32, T::U32, T::U32, T::U8 )
OPCODE(ExtractRegister64, T::U64, T::U64, T::U64, T::U8 )
// Saturated instructions
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(SignedSaturatedAdd, T::U32, T::U32, T::U32 )
OPCODE(SignedSaturatedSub, T::U32, T::U32, T::U32 )
OPCODE(UnsignedSaturation, T::U32, T::U32, T::U8 )
OPCODE(SignedSaturation, T::U32, T::U32, T::U8 )
// Packed instructions
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
OPCODE(PackedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedAddSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSubAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingAddSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedHalvingSubAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(PackedAbsDiffSumS8, T::U32, T::U32, T::U32 )
OPCODE(PackedSelect, T::U32, T::U32, T::U32, T::U32 )
// CRC instructions
OPCODE(CRC32Castagnoli8, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli16, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli32, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli64, T::U32, T::U32, T::U64 )
OPCODE(CRC32ISO8, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO16, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO32, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO64, T::U32, T::U32, T::U64 )
OPCODE(CRC32Castagnoli8, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli16, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli32, T::U32, T::U32, T::U32 )
OPCODE(CRC32Castagnoli64, T::U32, T::U32, T::U64 )
OPCODE(CRC32ISO8, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO16, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO32, T::U32, T::U32, T::U32 )
OPCODE(CRC32ISO64, T::U32, T::U32, T::U64 )
// AES instructions
OPCODE(AESDecryptSingleRound, T::U128, T::U128 )
OPCODE(AESEncryptSingleRound, T::U128, T::U128 )
OPCODE(AESInverseMixColumns, T::U128, T::U128 )
OPCODE(AESMixColumns, T::U128, T::U128 )
OPCODE(AESDecryptSingleRound, T::U128, T::U128 )
OPCODE(AESEncryptSingleRound, T::U128, T::U128 )
OPCODE(AESInverseMixColumns, T::U128, T::U128 )
OPCODE(AESMixColumns, T::U128, T::U128 )
// Vector instructions
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 )
OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 )
OPCODE(VectorSetElement8, T::U128, T::U128, T::U8, T::U8 )
OPCODE(VectorSetElement16, T::U128, T::U128, T::U8, T::U16 )
OPCODE(VectorSetElement32, T::U128, T::U128, T::U8, T::U32 )
OPCODE(VectorSetElement64, T::U128, T::U128, T::U8, T::U64 )
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
OPCODE(VectorBroadcastLower8, T::U128, T::U8 )
OPCODE(VectorBroadcastLower16, T::U128, T::U16 )
OPCODE(VectorBroadcastLower32, T::U128, T::U32 )
OPCODE(VectorBroadcast8, T::U128, T::U8 )
OPCODE(VectorBroadcast16, T::U128, T::U16 )
OPCODE(VectorBroadcast32, T::U128, T::U32 )
OPCODE(VectorBroadcast64, T::U128, T::U64 )
OPCODE(VectorEor, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual8, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual16, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual32, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual64, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual128, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 )
OPCODE(VectorNot, T::U128, T::U128 )
OPCODE(VectorOr, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorZeroUpper, T::U128, T::U128 )
OPCODE(VectorGetElement8, T::U8, T::U128, T::U8 )
OPCODE(VectorGetElement16, T::U16, T::U128, T::U8 )
OPCODE(VectorGetElement32, T::U32, T::U128, T::U8 )
OPCODE(VectorGetElement64, T::U64, T::U128, T::U8 )
OPCODE(VectorSetElement8, T::U128, T::U128, T::U8, T::U8 )
OPCODE(VectorSetElement16, T::U128, T::U128, T::U8, T::U16 )
OPCODE(VectorSetElement32, T::U128, T::U128, T::U8, T::U32 )
OPCODE(VectorSetElement64, T::U128, T::U128, T::U8, T::U64 )
OPCODE(VectorAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorAnd, T::U128, T::U128, T::U128 )
OPCODE(VectorBroadcastLower8, T::U128, T::U8 )
OPCODE(VectorBroadcastLower16, T::U128, T::U16 )
OPCODE(VectorBroadcastLower32, T::U128, T::U32 )
OPCODE(VectorBroadcast8, T::U128, T::U8 )
OPCODE(VectorBroadcast16, T::U128, T::U16 )
OPCODE(VectorBroadcast32, T::U128, T::U32 )
OPCODE(VectorBroadcast64, T::U128, T::U64 )
OPCODE(VectorEor, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual8, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual16, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual32, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual64, T::U128, T::U128, T::U128 )
OPCODE(VectorEqual128, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower8, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower16, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower32, T::U128, T::U128, T::U128 )
OPCODE(VectorInterleaveLower64, T::U128, T::U128, T::U128 )
OPCODE(VectorLogicalShiftLeft8, T::U128, T::U128, T::U8 )
OPCODE(VectorLogicalShiftLeft16, T::U128, T::U128, T::U8 )
OPCODE(VectorLogicalShiftLeft32, T::U128, T::U128, T::U8 )
OPCODE(VectorLogicalShiftLeft64, T::U128, T::U128, T::U8 )
OPCODE(VectorNot, T::U128, T::U128 )
OPCODE(VectorOr, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower8, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAddLower32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd8, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd16, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd32, T::U128, T::U128, T::U128 )
OPCODE(VectorPairedAdd64, T::U128, T::U128, T::U128 )
OPCODE(VectorZeroUpper, T::U128, T::U128 )
// Floating-point operations
OPCODE(FPAbs32, T::U32, T::U32 )
OPCODE(FPAbs64, T::U64, T::U64 )
OPCODE(FPAdd32, T::U32, T::U32, T::U32 )
OPCODE(FPAdd64, T::U64, T::U64, T::U64 )
OPCODE(FPCompare32, T::NZCVFlags, T::U32, T::U32, T::U1 )
OPCODE(FPCompare64, T::NZCVFlags, T::U64, T::U64, T::U1 )
OPCODE(FPDiv32, T::U32, T::U32, T::U32 )
OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
OPCODE(FPMul32, T::U32, T::U32, T::U32 )
OPCODE(FPMul64, T::U64, T::U64, T::U64 )
OPCODE(FPNeg32, T::U32, T::U32 )
OPCODE(FPNeg64, T::U64, T::U64 )
OPCODE(FPSqrt32, T::U32, T::U32 )
OPCODE(FPSqrt64, T::U64, T::U64 )
OPCODE(FPSub32, T::U32, T::U32, T::U32 )
OPCODE(FPSub64, T::U64, T::U64, T::U64 )
OPCODE(FPAbs32, T::U32, T::U32 )
OPCODE(FPAbs64, T::U64, T::U64 )
OPCODE(FPAdd32, T::U32, T::U32, T::U32 )
OPCODE(FPAdd64, T::U64, T::U64, T::U64 )
OPCODE(FPCompare32, T::NZCVFlags, T::U32, T::U32, T::U1 )
OPCODE(FPCompare64, T::NZCVFlags, T::U64, T::U64, T::U1 )
OPCODE(FPDiv32, T::U32, T::U32, T::U32 )
OPCODE(FPDiv64, T::U64, T::U64, T::U64 )
OPCODE(FPMul32, T::U32, T::U32, T::U32 )
OPCODE(FPMul64, T::U64, T::U64, T::U64 )
OPCODE(FPNeg32, T::U32, T::U32 )
OPCODE(FPNeg64, T::U64, T::U64 )
OPCODE(FPSqrt32, T::U32, T::U32 )
OPCODE(FPSqrt64, T::U64, T::U64 )
OPCODE(FPSub32, T::U32, T::U32, T::U32 )
OPCODE(FPSub64, T::U64, T::U64, T::U64 )
// Floating-point conversions
OPCODE(FPSingleToDouble, T::U64, T::U32 )
OPCODE(FPDoubleToSingle, T::U32, T::U64 )
OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 )
OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 )
OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 )
OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 )
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )
OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 )
OPCODE(FPSingleToDouble, T::U64, T::U32 )
OPCODE(FPDoubleToSingle, T::U32, T::U64 )
OPCODE(FPSingleToU32, T::U32, T::U32, T::U1 )
OPCODE(FPSingleToS32, T::U32, T::U32, T::U1 )
OPCODE(FPDoubleToU32, T::U32, T::U64, T::U1 )
OPCODE(FPDoubleToS32, T::U32, T::U64, T::U1 )
OPCODE(FPU32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPS32ToSingle, T::U32, T::U32, T::U1 )
OPCODE(FPU32ToDouble, T::U64, T::U32, T::U1 )
OPCODE(FPS32ToDouble, T::U64, T::U32, T::U1 )
// A32 Memory access
A32OPC(ClearExclusive, T::Void, )
A32OPC(SetExclusive, T::Void, T::U32, T::U8 )
A32OPC(ReadMemory8, T::U8, T::U32 )
A32OPC(ReadMemory16, T::U16, T::U32 )
A32OPC(ReadMemory32, T::U32, T::U32 )
A32OPC(ReadMemory64, T::U64, T::U32 )
A32OPC(WriteMemory8, T::Void, T::U32, T::U8 )
A32OPC(WriteMemory16, T::Void, T::U32, T::U16 )
A32OPC(WriteMemory32, T::Void, T::U32, T::U32 )
A32OPC(WriteMemory64, T::Void, T::U32, T::U64 )
A32OPC(ExclusiveWriteMemory8, T::U32, T::U32, T::U8 )
A32OPC(ExclusiveWriteMemory16, T::U32, T::U32, T::U16 )
A32OPC(ExclusiveWriteMemory32, T::U32, T::U32, T::U32 )
A32OPC(ExclusiveWriteMemory64, T::U32, T::U32, T::U32, T::U32 )
A32OPC(ClearExclusive, T::Void, )
A32OPC(SetExclusive, T::Void, T::U32, T::U8 )
A32OPC(ReadMemory8, T::U8, T::U32 )
A32OPC(ReadMemory16, T::U16, T::U32 )
A32OPC(ReadMemory32, T::U32, T::U32 )
A32OPC(ReadMemory64, T::U64, T::U32 )
A32OPC(WriteMemory8, T::Void, T::U32, T::U8 )
A32OPC(WriteMemory16, T::Void, T::U32, T::U16 )
A32OPC(WriteMemory32, T::Void, T::U32, T::U32 )
A32OPC(WriteMemory64, T::Void, T::U32, T::U64 )
A32OPC(ExclusiveWriteMemory8, T::U32, T::U32, T::U8 )
A32OPC(ExclusiveWriteMemory16, T::U32, T::U32, T::U16 )
A32OPC(ExclusiveWriteMemory32, T::U32, T::U32, T::U32 )
A32OPC(ExclusiveWriteMemory64, T::U32, T::U32, T::U32, T::U32 )
// A64 Memory access
A64OPC(ReadMemory8, T::U8, T::U64 )
A64OPC(ReadMemory16, T::U16, T::U64 )
A64OPC(ReadMemory32, T::U32, T::U64 )
A64OPC(ReadMemory64, T::U64, T::U64 )
A64OPC(ReadMemory128, T::U128, T::U64 )
A64OPC(WriteMemory8, T::Void, T::U64, T::U8 )
A64OPC(WriteMemory16, T::Void, T::U64, T::U16 )
A64OPC(WriteMemory32, T::Void, T::U64, T::U32 )
A64OPC(WriteMemory64, T::Void, T::U64, T::U64 )
A64OPC(WriteMemory128, T::Void, T::U64, T::U128 )
A64OPC(ReadMemory8, T::U8, T::U64 )
A64OPC(ReadMemory16, T::U16, T::U64 )
A64OPC(ReadMemory32, T::U32, T::U64 )
A64OPC(ReadMemory64, T::U64, T::U64 )
A64OPC(ReadMemory128, T::U128, T::U64 )
A64OPC(WriteMemory8, T::Void, T::U64, T::U8 )
A64OPC(WriteMemory16, T::Void, T::U64, T::U16 )
A64OPC(WriteMemory32, T::Void, T::U64, T::U32 )
A64OPC(WriteMemory64, T::Void, T::U64, T::U64 )
A64OPC(WriteMemory128, T::Void, T::U64, T::U128 )
// Coprocessor
A32OPC(CoprocInternalOperation, T::Void, T::CoprocInfo )
A32OPC(CoprocSendOneWord, T::Void, T::CoprocInfo, T::U32 )
A32OPC(CoprocSendTwoWords, T::Void, T::CoprocInfo, T::U32, T::U32 )
A32OPC(CoprocGetOneWord, T::U32, T::CoprocInfo )
A32OPC(CoprocGetTwoWords, T::U64, T::CoprocInfo )
A32OPC(CoprocLoadWords, T::Void, T::CoprocInfo, T::U32 )
A32OPC(CoprocStoreWords, T::Void, T::CoprocInfo, T::U32 )
A32OPC(CoprocInternalOperation, T::Void, T::CoprocInfo )
A32OPC(CoprocSendOneWord, T::Void, T::CoprocInfo, T::U32 )
A32OPC(CoprocSendTwoWords, T::Void, T::CoprocInfo, T::U32, T::U32 )
A32OPC(CoprocGetOneWord, T::U32, T::CoprocInfo )
A32OPC(CoprocGetTwoWords, T::U64, T::CoprocInfo )
A32OPC(CoprocLoadWords, T::Void, T::CoprocInfo, T::U32 )
A32OPC(CoprocStoreWords, T::Void, T::CoprocInfo, T::U32 )