diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..4de8772d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +sudo: true +dist: bionic +language: cpp +compiler: + - gcc + - clang +addons: + apt: + packages: + - nasm yasm g++-multilib tcsh +script: + - make test diff --git a/Makefile b/Makefile index a7850a22..f91f6261 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PREFIX=/usr/local +PREFIX?=/usr/local INSTALL_DIR=$(PREFIX)/include/xbyak all: diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 069ca540..250c8d4b 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -23,7 +23,7 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "kadd", 0x4A }, { "kand", 0x41 }, @@ -46,7 +46,7 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "knot", 0x44 }, { "kortest", 0x98 }, @@ -63,23 +63,23 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "kshiftl", 0x32 }, { "kshiftr", 0x30 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - printf("void %sw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); - printf("void %sb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); + printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); + printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); } } - puts("void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); - puts("void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); - puts("void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); - puts("void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); + puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); + puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); + puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); + puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); @@ -98,13 +98,13 @@ void putOpmask(bool only64bit) void putVcmp() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; } tbl[] = { - { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true }, - { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true }, + { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true }, + { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true }, { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, @@ -142,7 +142,7 @@ void putVcmp() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -150,7 +150,7 @@ void putVcmp() void putX_XM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -198,7 +198,7 @@ void putX_XM() void putM_X() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -219,7 +219,7 @@ void putM_X() void putXM_X() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -242,7 +242,7 @@ void putXM_X() void putX_X_XM_IMM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -368,12 +368,15 @@ void putX_X_XM_IMM() { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, + + { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, + { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -381,7 +384,7 @@ void putShift() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int idx; int type; } tbl[] = { @@ -394,7 +397,7 @@ void putShift() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } @@ -403,7 +406,7 @@ void putExtractInsert() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool isZMM; } tbl[] = { @@ -421,13 +424,13 @@ void putExtractInsert() const Tbl& p = tbl[i]; std::string type = type2String(p.type); const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; - printf("void %s(const Operand& op, const %s& r, uint8 imm) { if (!op.is(%s)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); + printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); } } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool isZMM; } tbl[] = { @@ -446,8 +449,8 @@ void putExtractInsert() std::string type = type2String(p.type); const char *x = p.isZMM ? "Zmm" : "Ymm"; const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; - printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8 imm) {" - "if (!%s) throw Error(ERR_BAD_COMBINATION); " + printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" + "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); } } @@ -457,7 +460,7 @@ void putBroadcast(bool only64bit) { { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; int reg; @@ -519,7 +522,7 @@ void putGather() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; int mode; } tbl[] = { { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, @@ -542,7 +545,7 @@ void putScatter() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; int mode; // reverse of gather } tbl[] = { { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, @@ -564,10 +567,10 @@ void putScatter() void putShuff() { - puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); - puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); - puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); - puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); + puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); + puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); + puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); + puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); } void putMov() @@ -624,7 +627,7 @@ void putMov() void putX_XM_IMM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -661,7 +664,7 @@ void putX_XM_IMM() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -674,7 +677,7 @@ void putMisc() const char *name; int zm; int type; - uint8 code; + uint8_t code; bool isZmm; } tbl[] = { { "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true }, @@ -705,12 +708,16 @@ void putMisc() } } - puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); - puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); - puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); - puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); + puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); + puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); + puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); + puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); + puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }"); + + puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }"); + puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }"); } void putV4FMA() @@ -728,7 +735,9 @@ int main(int argc, char *[]) bool only64bit = argc == 2; putOpmask(only64bit); putBroadcast(only64bit); - if (only64bit) return 0; + if (only64bit) { + return 0; + } putVcmp(); putX_XM(); putM_X(); diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 43984c0c..ba7dbf61 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -23,9 +23,9 @@ void put_jREGz(const char *reg, bool prefix) struct GenericTbl { const char *name; - uint8 code1; - uint8 code2; - uint8 code3; + uint8_t code1; + uint8_t code2; + uint8_t code3; }; void putGeneric(const GenericTbl *p, size_t n) @@ -44,7 +44,7 @@ void putX_X_XM(bool omitOnly) // (x, x, x/m[, imm]) or (y, y, y/m[, imm]) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -212,25 +212,37 @@ void putX_X_XM(bool omitOnly) std::string type = type2String(p->type); if (omitOnly) { if (p->enableOmit) { - printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8 imm" : "", p->name, p->hasIMM ? ", imm" : ""); + printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : ""); } } else { if (p->mode & 1) { if (p->hasIMM) { - printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); + printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); } else { printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code); } } if (p->mode & 2) { printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } } } +void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32) +{ + printf("void %s(const Address& addr) { ", name); + if (prefix) printf("db(0x%02X); ", prefix); + printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2); +} + +void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE) +{ + printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2); +} + void put() { const int NO = CodeGenerator::NONE; @@ -250,7 +262,7 @@ void put() const int Q = 1 << 3; { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { // MMX @@ -300,7 +312,7 @@ void put() { const struct Tbl { - uint8 code; + uint8_t code; int mode; const char *name; } tbl[] = { @@ -334,7 +346,7 @@ void put() { const struct Tbl { - uint8 code; + uint8_t code; int ext; int mode; const char *name; @@ -364,8 +376,8 @@ void put() { const struct Tbl { - uint8 code; - uint8 pref; + uint8_t code; + uint8_t pref; const char *name; } tbl[] = { { 0x70, 0, "pshufw" }, @@ -375,13 +387,13 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); + printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); } } { const struct MmxTbl6 { - uint8 code; // for (reg, reg/[mem]) - uint8 code2; // for ([mem], reg) + uint8_t code; // for (reg, reg/[mem]) + uint8_t code2; // for ([mem], reg) int pref; const char *name; } mmxTbl6[] = { @@ -420,7 +432,7 @@ void put() { 0xF2, "sd" }, }; const struct Tbl { - uint8 code; + uint8_t code; int mode; const char *name; bool hasImm; @@ -449,8 +461,8 @@ void put() for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { if (!(p->mode & (1 << j))) continue; if (p->hasImm) { - // don't change uint8 to int because NO is not in byte - printf("void %s%s(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); + // don't change uint8_t to int because NO is not in byte + printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } else { printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } @@ -460,8 +472,8 @@ void put() { // (XMM, XMM) const struct Tbl { - uint8 code; - uint8 pref; + uint8_t code; + uint8_t pref; const char *name; } tbl[] = { { 0xF7, 0x66, "maskmovdqu" }, @@ -478,7 +490,7 @@ void put() { // (XMM, XMM|MEM) const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; } tbl[] = { @@ -510,7 +522,7 @@ void put() { // special type const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; const char *cond; @@ -554,7 +566,7 @@ void put() } { const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; } tbl[] = { @@ -571,7 +583,7 @@ void put() { // cmov const struct Tbl { - uint8 ext; + uint8_t ext; const char *name; } tbl[] = { { 0, "o" }, @@ -616,6 +628,22 @@ void put() printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg); } } + { + const struct Tbl { + const char *name; + uint8_t code; + } tbl[] = { + { "loop", 0xE2 }, + { "loope", 0xE1 }, + { "loopne", 0xE0 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); + printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); + printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name); + } + } //////////////////////////////////////////////////////////////// { const GenericTbl tbl[] = { @@ -633,16 +661,28 @@ void put() { "cmpsb", 0xA6 }, { "cmpsw", 0x66, 0xA7 }, { "cmpsd", 0xA7 }, + { "int3", 0xCC }, { "scasb", 0xAE }, { "scasw", 0x66, 0xAF }, { "scasd", 0xAF }, { "movsb", 0xA4 }, + { "leave", 0xC9 }, + { "lodsb", 0xAC }, + { "lodsw", 0x66, 0xAD }, + { "lodsd", 0xAD }, { "movsw", 0x66, 0xA5 }, { "movsd", 0xA5 }, + { "outsb", 0x6E }, + { "outsw", 0x66, 0x6F }, + { "outsd", 0x6F }, { "stosb", 0xAA }, { "stosw", 0x66, 0xAB }, { "stosd", 0xAB }, { "rep", 0xF3 }, + { "repe", 0xF3 }, + { "repz", 0xF3 }, + { "repne", 0xF2 }, + { "repnz", 0xF2 }, { "lahf", 0x9F }, { "lock", 0xF0 }, @@ -651,6 +691,8 @@ void put() { "stc", 0xF9 }, { "std", 0xFD }, { "sti", 0xFB }, + { "sysenter", 0x0F, 0x34 }, + { "sysexit", 0x0F, 0x35 }, { "emms", 0x0F, 0x77 }, { "pause", 0xF3, 0x90 }, @@ -684,7 +726,8 @@ void put() { "fabs", 0xD9, 0xE1 }, { "faddp", 0xDE, 0xC1 }, { "fchs", 0xD9, 0xE0 }, - + { "fclex", 0x9B, 0xDB, 0xE2 }, + { "fnclex", 0xDB, 0xE2 }, { "fcom", 0xD8, 0xD1 }, { "fcomp", 0xD8, 0xD9 }, { "fcompp", 0xDE, 0xD9 }, @@ -724,13 +767,23 @@ void put() { "fxtract", 0xD9, 0xF4 }, { "fyl2x", 0xD9, 0xF1 }, { "fyl2xp1", 0xD9, 0xF9 }, + + // AMD Zen + { "monitorx", 0x0F, 0x01, 0xFA }, + { "mwaitx", 0x0F, 0x01, 0xFB }, + { "clzero", 0x0F, 0x01, 0xFC }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); + puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }"); + puts("void int_(uint8_t x) { db(0xCD); db(x); }"); + putLoadSeg("lss", 0x0F, 0xB2); + putLoadSeg("lfs", 0x0F, 0xB4); + putLoadSeg("lgs", 0x0F, 0xB5); } { const struct Tbl { - uint8 code; // (reg, reg) - uint8 ext; // (reg, imm) + uint8_t code; // (reg, reg) + uint8_t ext; // (reg, imm) const char *name; } tbl[] = { { 0x10, 2, "adc" }, @@ -745,14 +798,14 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); + printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); } } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0x48, 1, "dec" }, @@ -765,8 +818,8 @@ void put() } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0xa3, 4, "bt" }, @@ -777,13 +830,13 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); + printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); } } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0xF6, 6, "div" }, @@ -802,7 +855,7 @@ void put() { const struct Tbl { const char *name; - uint8 ext; + uint8_t ext; } tbl[] = { { "rcl", 2 }, { "rcr", 3 }, @@ -823,21 +876,21 @@ void put() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "shld", 0xA4 }, { "shrd", 0xAC }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); + printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code); } } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "bsf", 0xBC }, { "bsr", 0xBD }, @@ -850,7 +903,7 @@ void put() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "popcnt", 0xB8 }, { "tzcnt", 0xBC }, @@ -864,7 +917,7 @@ void put() // SSSE3 { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0x00, "pshufb" }, @@ -887,12 +940,12 @@ void put() const Tbl *p = &tbl[i]; printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code); } - printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); + printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "pclmullqlqdq", 0 }, { "pclmulhqlqdq", 1 }, @@ -906,26 +959,41 @@ void put() } { const struct Tbl { - uint8 code1; + uint8_t code1; int code2; - uint8 ext; + uint8_t ext; const char *name; + uint8_t prefix; } tbl[] = { - { 0x0F, 0xAE, 2, "ldmxcsr" }, - { 0x0F, 0xAE, 3, "stmxcsr" }, - { 0x0F, 0xAE, 7, "clflush" }, // 0x80 is bug of nasm ? - { 0xD9, NONE, 5, "fldcw" }, -// { 0x9B, 0xD9, 7, "fstcw" }, // not correct order for fstcw [eax] on 64bit OS + { 0x0F, 0xAE, 2, "ldmxcsr", 0 }, + { 0x0F, 0xAE, 3, "stmxcsr", 0 }, + { 0x0F, 0xAE, 7, "clflush", 0 }, + { 0x0F, 0xAE, 7, "clflushopt", 0x66 }, + { 0xDF, NONE, 4, "fbld", 0 }, + { 0xDF, NONE, 6, "fbstp", 0 }, + { 0xD9, NONE, 5, "fldcw", 0 }, + { 0xD9, NONE, 4, "fldenv", 0 }, + { 0xDD, NONE, 4, "frstor", 0 }, + { 0xDD, NONE, 6, "fsave", 0x9B }, + { 0xDD, NONE, 6, "fnsave", 0 }, + { 0xD9, NONE, 7, "fstcw", 0x9B }, + { 0xD9, NONE, 7, "fnstcw", 0 }, + { 0xD9, NONE, 6, "fstenv", 0x9B }, + { 0xD9, NONE, 6, "fnstenv", 0 }, + { 0xDD, NONE, 7, "fstsw", 0x9B }, + { 0xDD, NONE, 7, "fnstsw", 0 }, + { 0x0F, 0xAE, 1, "fxrstor", 0 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Address& addr) { opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->name, p->ext, p->code1, p->code2); + putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2); } - printf("void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }\n"); + puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }"); + puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }"); } { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0x2B, "movntpd" }, @@ -939,7 +1007,7 @@ void put() } { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0xBE, "movsx" }, @@ -950,6 +1018,12 @@ void put() printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code); } } + { // in/out + puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }"); + puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }"); + puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }"); + puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }"); + } // mpx { puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); @@ -963,7 +1037,7 @@ void put() } // misc { - puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }"); + puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }"); puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }"); puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); @@ -975,24 +1049,24 @@ void put() puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); - puts("void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }"); - puts("void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); }"); - puts("void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); }"); - puts("void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }"); - puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); - puts("void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); - puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); - puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }"); + puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }"); + puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }"); + puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }"); + puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); + puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); + puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }"); - puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }"); + puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }"); puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }"); puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }"); puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }"); puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }"); puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }"); puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }"); - puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); }"); + puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }"); puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }"); puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); @@ -1002,18 +1076,18 @@ void put() puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }"); puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }"); puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }"); - puts("void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); - puts("void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }"); } { const struct Tbl { - uint8 m16; - uint8 m32; - uint8 m64; - uint8 ext; + uint8_t m16; + uint8_t m32; + uint8_t m64; + uint8_t ext; const char *name; - uint8 m64ext; + uint8_t m64ext; } tbl[] = { { 0x00, 0xD8, 0xDC, 0, "fadd" }, { 0xDE, 0xDA, 0x00, 0, "fiadd" }, @@ -1046,8 +1120,8 @@ void put() } { const struct Tbl { - uint32 code1; - uint32 code2; + uint32_t code1; + uint32_t code2; const char *name; } tbl[] = { { 0xD8C0, 0xDCC0, "fadd" }, @@ -1091,8 +1165,8 @@ void put() } { const struct Tbl { - uint8 code1; - uint8 code2; + uint8_t code1; + uint8_t code2; const char *name; } tbl[] = { { 0xD8, 0xD0, "fcom" }, @@ -1113,7 +1187,7 @@ void put() // AVX { // pd, ps, sd, ss const struct Tbl { - uint8 code; + uint8_t code; const char *name; bool only_pd_ps; } tbl[] = { @@ -1142,7 +1216,7 @@ void put() // (x, x/m[, imm]) or (y, y/m[, imm]) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -1223,7 +1297,7 @@ void put() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { - const char *immS1 = p->hasIMM ? ", uint8 imm" : ""; + const char *immS1 = p->hasIMM ? ", uint8_t imm" : ""; const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE"; const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE"; @@ -1231,14 +1305,14 @@ void put() } if (p->mode & 2) { printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } // (m, x), (m, y) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1259,7 +1333,7 @@ void put() // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; int mode; // 1 : sse, 2 : avx, 3 : sse + avx @@ -1280,7 +1354,7 @@ void put() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { - uint8 pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; + uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : ""); } if (p->mode & 2) { @@ -1308,7 +1382,7 @@ void put() // vpermd, vpermps { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1326,7 +1400,7 @@ void put() // vpermq, vpermpd { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1336,7 +1410,7 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); } } // vcmpeqps @@ -1363,7 +1437,7 @@ void put() const struct Tbl { bool isH; bool isPd; - uint8 code; + uint8_t code; } tbl[] = { { true, true, 0x16 }, { true, false, 0x16 }, @@ -1375,7 +1449,7 @@ void put() char c = p.isH ? 'h' : 'l'; const char *suf = p.isPd ? "pd" : "ps"; const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; - printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" + printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" , c, suf, type, p.code); printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" , c, suf, type, p.code + 1); @@ -1384,7 +1458,7 @@ void put() // FMA { const struct Tbl { - uint8 code; + uint8_t code; const char *name; bool supportYMM; } tbl[] = { @@ -1408,7 +1482,7 @@ void put() for (int k = 0; k < 3; k++) { const struct Ord { const char *str; - uint8 code; + uint8_t code; } ord[] = { { "132", 0x90 }, { "213", 0xA0 }, @@ -1438,10 +1512,10 @@ void put() { printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n"); printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n"); - printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); + printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool ew1; } tbl[] = { @@ -1454,40 +1528,40 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); } - puts("void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); - puts("void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); - puts("void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); - puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); - puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); - puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); - puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); + puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); + puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); + puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); + puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); + puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); + puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); + puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }"); puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }"); puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }"); puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }"); - puts("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); - puts("void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); - puts("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); - puts("void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); + puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); + puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); + puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); + puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); - puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); - puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); - puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); - puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); + puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); + puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); + puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); + puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); - puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); + puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); } // (x, x, imm), (x, imm) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int idx; int type; } tbl[] = { @@ -1505,14 +1579,14 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void v%s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } // 4-op { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "vblendvpd", 0x4B }, { "vblendvps", 0x4A }, @@ -1525,18 +1599,18 @@ void put() } // mov { - printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); - printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); + printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); + printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); - printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); - printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); + printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); + printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); - printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); - printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); + printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); + printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }"); puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }"); @@ -1549,7 +1623,7 @@ void put() int type = T_0F | T_EVEX; type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4); std::string s = type2String(type); - printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); } @@ -1574,7 +1648,7 @@ void put() puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); - puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); + puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); } // haswell gpr(reg, reg, r/m) @@ -1582,7 +1656,7 @@ void put() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; } tbl[] = { { "andn", T_0F38, 0xF2 }, { "mulx", T_F2 | T_0F38, 0xF6 }, @@ -1599,7 +1673,7 @@ void put() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; } tbl[] = { { "bextr", T_0F38, 0xF7 }, { "bzhi", T_0F38, 0xF5 }, @@ -1611,15 +1685,15 @@ void put() const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code); } - puts("void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); + puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); } // gpr(reg, r/m) { const struct Tbl { const char *name; int type; - uint8 code; - uint8 idx; + uint8_t code; + uint8_t idx; } tbl[] = { { "blsi", T_0F38, 0xF3, 3 }, { "blsmsk", T_0F38, 0xF3, 2 }, @@ -1637,7 +1711,7 @@ void put() const int x_vy_x = 2; const struct Tbl { const char *name; - uint8 code; + uint8_t code; int w; int mode; } tbl[] = { @@ -1669,6 +1743,7 @@ void put32() { "aas", 0x3F }, { "daa", 0x27 }, { "das", 0x2F }, + { "into", 0xCE }, { "popad", 0x61 }, { "popfd", 0x9D }, { "pusha", 0x60 }, @@ -1677,6 +1752,8 @@ void put32() { "popa", 0x61 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); + putLoadSeg("lds", 0xC5, NONE); + putLoadSeg("les", 0xC4, NONE); } void put64() @@ -1688,18 +1765,24 @@ void put64() { "cdqe", 0x48, 0x98 }, { "cqo", 0x48, 0x99 }, { "cmpsq", 0x48, 0xA7 }, + { "popfq", 0x9D }, + { "pushfq", 0x9C }, + { "lodsq", 0x48, 0xAD }, { "movsq", 0x48, 0xA5 }, { "scasq", 0x48, 0xAF }, { "stosq", 0x48, 0xAB }, + { "syscall", 0x0F, 0x05 }, + { "sysret", 0x0F, 0x07 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); - puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }"); + putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64); + putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64); puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); - puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); - puts("void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); - puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); + puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); + puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); + puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }"); puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }"); @@ -1710,10 +1793,35 @@ void put64() puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); } +void putAMX_TILE() +{ + puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }"); + puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }"); + puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }"); + puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }"); + puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }"); + puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }"); + puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }"); +} +void putAMX_INT8() +{ + puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }"); +} +void putAMX_BF16() +{ + puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }"); +} + void putFixed() { puts("#ifdef XBYAK64"); put64(); + putAMX_TILE(); + putAMX_INT8(); + putAMX_BF16(); puts("#else"); put32(); puts("#endif"); @@ -1724,7 +1832,7 @@ void putFixed() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name); - printf("void %s(const Operand& op, uint32 imm) { %s_(op, imm); }\n", name, name); + printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name); } puts("void not(const Operand& op) { not_(op); }"); puts("#endif"); @@ -1732,10 +1840,10 @@ void putFixed() void putOmit() { - puts("void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); }"); - puts("void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); }"); - puts("void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); }"); - puts("void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); }"); + puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }"); + puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }"); + puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }"); + puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }"); puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }"); puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }"); @@ -1769,7 +1877,7 @@ void putOmit() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; - printf("void v%s(const Xmm& x, uint8 imm) { v%s(x, x, imm); }\n", name, name); + printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name); } } { diff --git a/readme.md b/readme.md index 421674d7..6caaa51a 100644 --- a/readme.md +++ b/readme.md @@ -1,22 +1,25 @@ +[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak) -# Xbyak 5.78 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.97 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract -This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. +Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. ## Feature * header file only * Intel/MASM like syntax * fully support AVX-512 -**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang. +**Note**: +Use `and_()`, `or_()`, ... instead of `and()`, `or()`. +If you want to use them, then specify `-fno-operator-names` option to gcc/clang. -Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them. - -and_(), or_(), xor_(), not_() are always available. - -`XBYAK_NO_OP_NAMES` will be defined in the feature version. +### News +- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. +- (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. +- support exception-less mode see. [Exception-less mode](#exception-less-mode) +- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined. ### Supported OS @@ -47,7 +50,6 @@ These files are copied into `/usr/local/include/xbyak`. Inherit `Xbyak::CodeGenerator` class and make the class method. ``` -#define XBYAK_NO_OP_NAMES #include struct Code : Xbyak::CodeGenerator { @@ -58,6 +60,15 @@ struct Code : Xbyak::CodeGenerator { } }; ``` +Or you can pass the instance of CodeGenerator without inheriting. +``` +void genCode(Xbyak::CodeGenerator& code, int x) { + using namespace Xbyak::util; + code.mov(eax, x); + code.ret(); +} +``` + Make an instance of the class and get the function pointer by calling `getCode()` and call it. ``` @@ -146,6 +157,8 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], ``` ### Remark * `k1`, ..., `k7` are opmask registers. + - `k0` is dealt as no mask. + - e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`. * use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. * `k4 | k3` is different from `k3 | k4`. * use `ptr_b` for broadcast `{1toX}`. X is automatically determined. @@ -212,6 +225,32 @@ void func1() } ``` +### short and long jump +Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. +So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. + +``` +jmp("short-jmp"); // short jmp +// small code +L("short-jmp"); + +jmp("long-jmp"); +// long code +L("long-jmp"); // throw exception +``` +Then specify T_NEAR for jmp. +``` +jmp("long-jmp", T_NEAR); // long jmp +// long code +L("long-jmp"); +``` +Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. +``` +jmp("long-jmp"); // long jmp +// long code +L("long-jmp"); +``` + ### Label class `L()` and `jxx()` support Label class. @@ -369,15 +408,22 @@ c.setProtectModeRE(); Call `readyRE()` instead of `ready()` when using `AutoGrow` mode. See [protect-re.cpp](sample/protect-re.cpp). +## Exception-less mode +If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`. +In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong. +The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`. +`CodeGenerator::reset()` calls `ClearError()`. + ## Macro * **XBYAK32** is defined on 32bit. * **XBYAK64** is defined on 64bit. -* **XBYAK64_WIN** is defined on 64bit Windows(VC) -* **XBYAK64_GCC** is defined on 64bit gcc, cygwin -* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names` -* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future) -* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro +* **XBYAK64_WIN** is defined on 64bit Windows(VC). +* **XBYAK64_GCC** is defined on 64bit gcc, cygwin. +* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, .... +* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future). +* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro. +* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`. ## Sample @@ -392,6 +438,31 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc. +* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later +* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`. +* 2020/Jul/28 ver 5.94 remove #include (only windows) +* 2020/Jul/21 ver 5.93 support exception-less mode +* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov) +* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64 +* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso) +* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined. +* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask) +* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register +* 2020/Feb/26 ver 5.891 fix typo of type +* 2020/Jan/03 ver 5.89 fix error of vfpclasspd +* 2019/Dec/20 ver 5.88 fix compile error on Windows +* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified. +* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available) +* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later +* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined +* 2019/Oct/12 ver 5.83 exit(1) was removed +* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam) +* 2019/Sep/14 ver 5.81 support some generic mnemonics. +* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov) +* 2019/May/27 support vp2intersectd, vp2intersectq (not tested) +* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps +* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky) * 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage) * 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov * 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel @@ -530,3 +601,5 @@ http://opensource.org/licenses/BSD-3-Clause ## Author MITSUNARI Shigeo(herumi@nifty.com) +## Sponsors welcome +[GitHub Sponsor](https://github.com/sponsors/herumi) diff --git a/readme.txt b/readme.txt index 20c77e95..dcbff9e7 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.78 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.97 ----------------------------------------------------------------------------- â—Žæ¦‚è¦ @@ -22,21 +22,21 @@ Intel Mac ãªã©ã§å‹•ä½œç¢ºèªã‚’ã—ã¦ã„ã¾ã™ã€‚ -※ Xbyakã¯ãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã§and(), or(), xor(), not()関数を使ã„ã¾ã™ã€‚ -gccã§ã¯ãれらを演算å­ã¨ã—ã¦è§£é‡ˆã—ã¦ã—ã¾ã†ãŸã‚ã€-fno-operator-namesオプションを追加ã—ã¦ã‚³ãƒ³ãƒ‘イルã—ã¦ãã ã•ã„。 -ã‚ã‚‹ã„ã¯XBYAK_NO_OP_NAMESを定義ã—ã¦and_(), or_(), xor_(), not_()を使ã£ã¦ãã ã•ã„。 -and_(), or_(), xor_(), not_()ã¯XBYAK_NO_OP_NAMESã•ã‚Œã¦ã„ãªã„ã¨ãã§ã‚‚使ãˆã¾ã™ã€‚ +※ and, orãªã©ã®ä»£ã‚ã‚Šã«and_, or_を使用ã—ã¦ãã ã•ã„。 +and, orãªã©ã‚’使ã„ãŸã„å ´åˆã¯-fno-operator-namesã‚’gcc/clangã«æŒ‡å®šã—ã¦ãã ã•ã„。 ----------------------------------------------------------------------------- ◎準備 xbyak.h xbyak_bin2hex.h -xbyak_mnemonic.h ã“れらをåŒä¸€ã®ãƒ‘スã«å…¥ã‚Œã¦ã‚¤ãƒ³ã‚¯ãƒ«ãƒ¼ãƒ‰ãƒ‘スã«è¿½åŠ ã—ã¦ãã ã•ã„。 Linuxã§ã¯make installã§/usr/local/include/xbyakã«ã‚³ãƒ”ーã•ã‚Œã¾ã™ã€‚ ----------------------------------------------------------------------------- ◎下ä½äº’æ›æ€§ã®ç ´ã‚Œ +* push byte, immã¾ãŸã¯push word, immãŒä¸‹ä½8bit, 16bitã«ã‚­ãƒ£ã‚¹ãƒˆã—ãŸå€¤ã‚’使ã†ã‚ˆã†ã«å¤‰æ›´ã€‚ +* (Windows) ``ã‚’includeã—ãªããªã£ãŸã®ã§å¿…è¦ãªã‚‰æ˜Žç¤ºçš„ã«includeã—ã¦ãã ã•ã„。 +* XBYAK_USE_MMAP_ALLOCATORãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã§æœ‰åŠ¹ã«ãªã‚Šã¾ã—ãŸã€‚従æ¥ã®æ–¹å¼ã«ã™ã‚‹å ´åˆã¯XBYAK_DONT_USE_MMAP_ALLOCATORを定義ã—ã¦ãã ã•ã„。 * Xbyak::Errorã®åž‹ã‚’enumã‹ã‚‰classã«å¤‰æ›´ ** 従æ¥ã®enumã®å€¤ã‚’ã¨ã‚‹ã«ã¯intã«ã‚­ãƒ£ã‚¹ãƒˆã—ã¦ãã ã•ã„。 * (å¤ã„)Reg32eクラスを(æ–°ã—ã„)Reg32eã¨RegExpã«åˆ†ã‘る。 @@ -46,6 +46,13 @@ Linuxã§ã¯make installã§/usr/local/include/xbyakã«ã‚³ãƒ”ーã•ã‚Œã¾ã™ã€‚ ----------------------------------------------------------------------------- ◎新機能 +例外ãªã—モード追加 +XBYAK_NO_EXCEPTIONを定義ã—ã¦ã‚³ãƒ³ãƒ‘イルã™ã‚‹ã¨gcc/clangã§-fno-exceptionsオプションã§ã‚³ãƒ³ãƒ‘イルã§ãã¾ã™ã€‚ +エラーã¯ä¾‹å¤–ã®ä»£ã‚ã‚Šã«`Xbyak::GetError()`ã§é€šé”ã•ã‚Œã¾ã™ã€‚ +ã“ã®å€¤ãŒ0ã§ãªã‘ã‚Œã°ä½•ã‹å•é¡ŒãŒç™ºç”Ÿã—ã¦ã„ã¾ã™ã€‚ +ã“ã®å€¤ã¯è‡ªå‹•çš„ã«å¤‰æ›´ã•ã‚Œãªã„ã®ã§`Xbyak::ClearError()`ã§ãƒªã‚»ãƒƒãƒˆã—ã¦ãã ã•ã„。 +`CodeGenerator::reset()`ã¯`ClearError()`を呼ã³ã¾ã™ã€‚ + MmapAllocator追加 ã“ã‚Œã¯Unixç³»OSã§ã®ã¿ã®ä»•æ§˜ã§ã™ã€‚XBYAK_USE_MMAP_ALLOCATORを使ã†ã¨åˆ©ç”¨ã§ãã¾ã™ã€‚ デフォルトã®Allocatorã¯ãƒ¡ãƒ¢ãƒªç¢ºä¿æ™‚ã«posix_memalignを使ã„ã¾ã™ã€‚ @@ -54,7 +61,6 @@ map countã®æœ€å¤§å€¤ã¯/proc/sys/vm/max_map_countã«æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚ デフォルトã§ã¯3万個ã»ã©ã®Xbyak::CodeGeneratorインスタンスを生æˆã™ã‚‹ã¨ã‚¨ãƒ©ãƒ¼ã«ãªã‚Šã¾ã™ã€‚ test/mprotect_test.cppã§ç¢ºèªã§ãã¾ã™ã€‚ ã“れをé¿ã‘ã‚‹ãŸã‚ã«ã¯mmapを使ã†MmapAllocatorを使ã£ã¦ãã ã•ã„。 -å°†æ¥ã“ã®æŒ™å‹•ãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã«ãªã‚‹ã‹ã‚‚ã—ã‚Œã¾ã›ã‚“。 AutoGrowモード追加 @@ -373,6 +379,31 @@ sample/{echo,hello}.bf㯠http://www.kmonos.net/alang/etc/brainfuck.php ã‹ã‚‰ ----------------------------------------------------------------------------- ◎履歴 +2020/09/08 ver 5.97 uint32ãªã©ã‚’uint32_tã«ç½®æ› +2020/08/28 ver 5.95 レジスタクラスã®ã‚³ãƒ³ã‚¹ãƒˆãƒ©ã‚¯ã‚¿ãŒconstexprã«å¯¾å¿œ(C++14以é™) +2020/08/04 ver 5.941 `CodeGenerator::reset()`ãŒ`ClearError()`を呼ã¶ã‚ˆã†ã«å¤‰æ›´ +2020/07/28 ver 5.94 #include ã®å‰Šé™¤ (only windows) +2020/07/21 ver 5.93 例外ãªã—モード追加 +2020/06/30 ver 5.92 Intel AMX命令サãƒãƒ¼ãƒˆ (Thanks to nshustrov) +2020/06/19 ver 5.913 32ビット環境ã§XBYAK64を定義ã—ãŸã¨ãã®mov(r64, imm64)を修正 +2020/06/19 ver 5.912 macOSã®å¤ã„Xcodeã§ã‚‚MAP_JITを有効ã«ã™ã‚‹(Thanks to rsdubtso) +2020/05/10 ver 5.911 Linux/macOSã§XBYAK_USE_MMAP_ALLOCATORãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆæœ‰åŠ¹ã«ãªã‚‹ +2020/04/20 ver 5.91 マスクレジスタk0ã‚’å—ã‘入れる(マスクをã—ãªã„) +2020/04/09 ver 5.90 kmov{b,w,d,q}ãŒã‚µãƒãƒ¼ãƒˆã•ã‚Œãªã„レジスタをå—ã‘ã‚‹ã¨ä¾‹å¤–を投ã’ã‚‹ +2020/02/26 ver 5.891 zm0ã®type修正 +2020/01/03 ver 5.89 vfpclasspdã®å‡¦ç†ã‚¨ãƒ©ãƒ¼ä¿®æ­£ +2019/12/20 ver 5.88 Windowsã§ã®ã‚³ãƒ³ãƒ‘イルエラー修正 +2019/12/19 ver 5.87 未定義ラベルã¸ã®jmp命令ã®ãƒ‡ãƒ•ã‚©ãƒ«ãƒˆæŒ™å‹•ã‚’T_NEARã«ã™ã‚‹setDefaultJmpNEAR()を追加 +2019/12/13 ver 5.86 [変更] -fno-operator-namesãŒæŒ‡å®šã•ã‚ŒãŸã¨ãã¯5.84以å‰ã®æŒ™å‹•ã«æˆ»ã™ +2019/12/07 ver 5.85 mmapã«MAP_JITフラグを追加(macOS mojave以上) +2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESãŒå®šç¾©ã•ã‚Œã¦ã„ãªã„é™ã‚ŠXBYAK_NO_OP_NAMESãŒå®šç¾©ã•ã‚Œã‚‹ã‚ˆã†ã«å¤‰æ›´ +2019/10/12 ver 5.83 exit(1)ã®é™¤åŽ» +2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam) +2019/09/14 ver 5.81 ã„ãã¤ã‹ã®ä¸€èˆ¬å‘½ä»¤ã‚’サãƒãƒ¼ãƒˆ +2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov) +2019/05/27 support vp2intersectd, vp2intersectq (not tested) +2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps +2019/04/27 ver 5.79 vcmppd/vcmppsã®ptr_b対応忘れ(thanks to jkopinsky) 2019/04/15 ver 5.78 Reg::changeBit()ã®ãƒªãƒ•ã‚¡ã‚¯ã‚¿ãƒªãƒ³ã‚°(thanks to MerryMage) 2019/03/06 ver 5.77 LLCキャッシュを共有数CPUæ•°ã®ä¿®æ•´(by densamoilov) 2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) diff --git a/sample/Makefile b/sample/Makefile index da21f204..0c100a31 100644 --- a/sample/Makefile +++ b/sample/Makefile @@ -1,10 +1,12 @@ -TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table XBYAK_INC=../xbyak/xbyak.h BOOST_EXIST=$(shell echo "\#include " | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1") UNAME_M=$(shell uname -m) +ONLY_64BIT=0 ifeq ($(shell uname -s),Darwin) + ONLY_64BIT=1 + OS=mac ifeq ($(UNAME_M),x86_64) BIT=64 endif @@ -27,19 +29,27 @@ else endif ifeq ($(BIT),64) -TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64 +TARGET += test64 bf64 memfunc64 test_util64 jmp_table64 ifeq ($(BOOST_EXIST),1) TARGET += calc64 #calc2_64 endif endif +ifneq ($(OS),mac) +TARGET += static_buf64 +endif + + +ifneq ($(ONLY_64BIT),1) + TARGET += test quantize bf toyvm test_util memfunc static_buf jmp_table ifeq ($(BOOST_EXIST),1) -TARGET += calc #calc2 + TARGET += calc #calc2 +endif endif all: $(TARGET) -CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic +CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN) @@ -85,9 +95,13 @@ jmp_table: $(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32 jmp_table64: $(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64 +profiler: profiler.cpp ../xbyak/xbyak_util.h + $(CXX) $(CFLAGS) profiler.cpp -o $@ +profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h + $(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl clean: - rm -rf *.o $(TARGET) *.exe + rm -rf *.o $(TARGET) *.exe profiler profiler-vtune test : test0.cpp $(XBYAK_INC) test64: test0.cpp $(XBYAK_INC) diff --git a/sample/bf.cpp b/sample/bf.cpp index 20a0fd96..2abb8a54 100644 --- a/sample/bf.cpp +++ b/sample/bf.cpp @@ -148,7 +148,7 @@ public: } }; -void dump(const Xbyak::uint8 *code, size_t size) +void dump(const uint8_t *code, size_t size) { puts("#include \nstatic int stack[128 * 1024];"); #ifdef _MSC_VER diff --git a/sample/bf.vcproj b/sample/bf.vcproj deleted file mode 100644 index 968d2c85..00000000 --- a/sample/bf.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/bf.vcxproj b/sample/bf.vcxproj new file mode 100644 index 00000000..88a74376 --- /dev/null +++ b/sample/bf.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {654BD79B-59D3-4B10-BBAA-158BAB272828} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Release/bf.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/bf.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + .\Debug/bf.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/bf.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/bf.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/bf.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/bf.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + X64 + .\Debug/bf.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/bf.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/bf.pdb + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/calc.cpp b/sample/calc.cpp index 3f4a0f9c..29ad9c3a 100644 --- a/sample/calc.cpp +++ b/sample/calc.cpp @@ -155,9 +155,9 @@ struct Grammar : public boost::spirit::classic::grammar { void put(const std::vector& x) { - printf("%f", x[0]); - for (size_t i = 1, n = x.size(); i < n; i++) { - printf(", %f", x[i]); + for (size_t i = 0, n = x.size(); i < n; i++) { + if (i > 0) printf(", "); + printf("%f", x[i]); } } diff --git a/sample/calc.vcproj b/sample/calc.vcproj deleted file mode 100644 index 3c6148aa..00000000 --- a/sample/calc.vcproj +++ /dev/null @@ -1,423 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/calc.vcxproj b/sample/calc.vcxproj new file mode 100644 index 00000000..2846bb3d --- /dev/null +++ b/sample/calc.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {5FDDFAA6-B947-491D-A17E-BBD863846579} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Release/calc.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/calc.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + .\Debug/calc.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/calc.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/calc.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/calc.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/calc.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + X64 + .\Debug/calc.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/calc.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/calc.pdb + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/calc2.cpp b/sample/calc2.cpp index 74b0494a..a13d2cf5 100644 --- a/sample/calc2.cpp +++ b/sample/calc2.cpp @@ -102,7 +102,7 @@ private: MAX_CONST_NUM = 32 }; MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM]; - Xbyak::uint64 negConst_; + Xbyak::uint64_t negConst_; size_t constTblPos_; #ifdef XBYAK32 const Xbyak::Reg32& varTbl_; @@ -118,7 +118,7 @@ public: 64bit: x [rcx](win), xmm0(gcc), return xmm0 */ Jit() - : negConst_(Xbyak::uint64(1) << 63) + : negConst_(Xbyak::uint64_t(1) << 63) , constTblPos_(0) #ifdef XBYAK32 , varTbl_(eax) diff --git a/sample/profiler.cpp b/sample/profiler.cpp new file mode 100644 index 00000000..dc15d9bf --- /dev/null +++ b/sample/profiler.cpp @@ -0,0 +1,90 @@ +/* + How to profile JIT-code with perf or VTune + sudo perf record ./profiler 1 + amplxe-cl -collect hotspots -result-dir r001hs -quiet ./profiler-vtune 2 +*/ +#include +#include +#include +#include + +const int N = 3000000; +struct Code : public Xbyak::CodeGenerator { + Code() + { + mov(eax, N); + Xbyak::Label lp = L(); + for (int i = 0; i < 10; i++) { + sub(eax, 1); + } + jg(lp); + mov(eax, 1); + ret(); + } +}; + +struct Code2 : public Xbyak::CodeGenerator { + Code2() + { + mov(eax, N); + Xbyak::Label lp = L(); + for (int i = 0; i < 10; i++) { + xorps(xm0, xm0); + } + sub(eax, 1); + jg(lp); + mov(eax, 1); + ret(); + } +}; + +double s1(int n) +{ + double r = 0; + for (int i = 0; i < n; i++) { + r += 1.0 / (i + 1); + } + return r; +} + +double s2(int n) +{ + double r = 0; + for (int i = 0; i < n; i++) { + r += 1.0 / (i * i + 1) + 2.0 / (i + 3); + } + return r; +} + +int main(int argc, char *argv[]) +{ + int mode = argc == 1 ? 0 : atoi(argv[1]); + Code c; + Code2 c2; + int (*f)() = (int (*)())c.getCode(); + int (*g)() = (int (*)())c2.getCode(); + + printf("f:%p, %d\n", (const void*)f, (int)c.getSize()); + printf("g:%p, %d\n", (const void*)g, (int)c2.getSize()); + Xbyak::util::Profiler prof; + printf("mode=%d\n", mode); + prof.init(mode); + prof.set("f", (const void*)f, c.getSize()); + prof.set("g", (const void*)g, c2.getSize()); + + double sum = 0; + for (int i = 0; i < 20000; i++) { + sum += s1(i); + sum += s2(i); + } + printf("sum=%f\n", sum); + for (int i = 0; i < 2000; i++) { + sum += f(); + } + printf("f=%f\n", sum); + for (int i = 0; i < 2000; i++) { + sum += g(); + } + printf("g=%f\n", sum); + puts("end"); +} diff --git a/sample/quantize.cpp b/sample/quantize.cpp index c3eeafb3..6bdf0d00 100644 --- a/sample/quantize.cpp +++ b/sample/quantize.cpp @@ -5,12 +5,12 @@ This program generates a quantization routine by using fast division algorithm in run-time. time(sec) - quality 1(low) 10 50 100(high) + quality 1(high) 10 50 100(low) VC2005 8.0 8.0 8.0 8.0 Xbyak 1.6 0.8 0.5 0.5 -; generated code at q = 100 +; generated code at q = 1 push esi push edi mov edi,dword ptr [esp+0Ch] @@ -48,9 +48,6 @@ #pragma warning(disable : 4996) // scanf #endif -typedef Xbyak::uint64 uint64; -typedef Xbyak::uint32 uint32; - const int N = 64; class Quantize : public Xbyak::CodeGenerator { @@ -66,7 +63,7 @@ public: output : eax = [esi+offset] / dividend destroy : edx */ - void udiv(uint32 dividend, int offset) + void udiv(uint32_t dividend, int offset) { mov(eax, ptr[esi + offset]); @@ -83,11 +80,11 @@ public: return; } - uint64 mLow, mHigh; + uint64_t mLow, mHigh; int len = ilog2(odd) + 1; { - uint64 roundUp = uint64(1) << (32 + len); - uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd)); + uint64_t roundUp = uint64_t(1) << (32 + len); + uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd)); mLow = roundUp / odd; mHigh = (roundUp + k) / odd; } @@ -96,12 +93,12 @@ public: mLow >>= 1; mHigh >>= 1; len--; } - uint64 m; int a; + uint64_t m; int a; if ((mHigh >> 32) == 0) { m = mHigh; a = 0; } else { len = ilog2(odd); - uint64 roundDown = uint64(1) << (32 + len); + uint64_t roundDown = uint64_t(1) << (32 + len); mLow = roundDown / odd; int r = (int)(roundDown % odd); m = (r <= (odd >> 1)) ? mLow : mLow + 1; @@ -124,9 +121,9 @@ public: mov(eax, edx); } /* - quantize(uint32 dest[64], const uint32 src[64]); + quantize(uint32_t dest[64], const uint32_t src[64]); */ - Quantize(const uint32 qTbl[64]) + Quantize(const uint32_t qTbl[64]) { push(esi); push(edi); @@ -143,7 +140,7 @@ public: } }; -void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64]) +void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64]) { for (int i = 0; i < N; i++) { dest[i] = src[i] / qTbl[i]; @@ -170,7 +167,7 @@ int main(int argc, char *argv[]) } } printf("q=%d\n", q); - uint32 qTbl[] = { + uint32_t qTbl[] = { 16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55, 14, 13, 16, 24, 40, 57, 69, 56, @@ -187,16 +184,16 @@ int main(int argc, char *argv[]) } try { - uint32 src[N]; - uint32 dest[N]; - uint32 dest2[N]; + uint32_t src[N]; + uint32_t dest[N]; + uint32_t dest2[N]; for (int i = 0; i < N; i++) { src[i] = rand() % 2048; } Quantize jit(qTbl); //printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode()); - void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode(); + void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode(); quantize(dest, src, qTbl); quantize2(dest2, src, qTbl); diff --git a/sample/quantize.vcproj b/sample/quantize.vcproj deleted file mode 100644 index a197e63c..00000000 --- a/sample/quantize.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/quantize.vcxproj b/sample/quantize.vcxproj new file mode 100644 index 00000000..5d073e74 --- /dev/null +++ b/sample/quantize.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {D06753BF-E1F3-4578-9B18-08673327F77C} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/quantize.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/quantize.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/quantize.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/quantize.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/quantize.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/quantize.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/quantize.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/quantize.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/quantize.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/quantize.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/test0.cpp b/sample/test0.cpp index 5a4d91ba..afbaf83f 100644 --- a/sample/test0.cpp +++ b/sample/test0.cpp @@ -163,15 +163,15 @@ int main() // use memory allocated by user using namespace Xbyak; const size_t codeSize = 4096; - uint8 buf[codeSize + 16]; - uint8 *p = CodeArray::getAlignedAddress(buf); + uint8_t buf[codeSize + 16]; + uint8_t *p = CodeArray::getAlignedAddress(buf); Sample s(p, codeSize); if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) { fprintf(stderr, "can't protect\n"); return 1; } int (*func)(int) = s.getCode(); - const uint8 *funcp = reinterpret_cast(func); + const uint8_t *funcp = reinterpret_cast(func); if (funcp != p) { fprintf(stderr, "internal error %p %p\n", p, funcp); return 1; diff --git a/sample/test0.vcproj b/sample/test0.vcproj deleted file mode 100644 index 7ed55712..00000000 --- a/sample/test0.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/test0.vcxproj b/sample/test0.vcxproj new file mode 100644 index 00000000..847db6cf --- /dev/null +++ b/sample/test0.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/test0.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test0.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test0.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/test0.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test0.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/test0.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test0.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test0.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/test0.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test0.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/test_util.cpp b/sample/test_util.cpp index d75a5e06..afb6e5a4 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -78,6 +78,8 @@ void putCPUinfo() { Cpu::tAVX512_VNNI, "avx512_vnni" }, { Cpu::tAVX512_BITALG, "avx512_bitalg" }, { Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" }, + { Cpu::tAVX512_BF16, "avx512_bf16" }, + { Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str); diff --git a/sample/test_util.vcproj b/sample/test_util.vcproj deleted file mode 100644 index 88de7d9c..00000000 --- a/sample/test_util.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/test_util.vcxproj b/sample/test_util.vcxproj new file mode 100644 index 00000000..0ed75e1d --- /dev/null +++ b/sample/test_util.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/test_util.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test_util.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test_util.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/test_util.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test_util.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test_util.pdb + Console + false + + MachineX64 + + + true + + + + + .\Release/test_util.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test_util.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/test_util.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test_util.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/toyvm.cpp b/sample/toyvm.cpp index cd869ea3..1e558ff0 100644 --- a/sample/toyvm.cpp +++ b/sample/toyvm.cpp @@ -39,7 +39,7 @@ using namespace Xbyak; class ToyVm : public Xbyak::CodeGenerator { - typedef std::vector Buffer; + typedef std::vector Buffer; public: enum Reg { A, B @@ -53,14 +53,14 @@ public: { ::memset(mem_, 0, sizeof(mem_)); } - void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); } - void vld(Reg r, uint16 idx) { encode(LD, r, idx); } - void vst(Reg r, uint16 idx) { encode(ST, r, idx); } - void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); } - void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); } - void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); } - void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); } - void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast(offset)); } + void vldi(Reg r, uint16_t imm) { encode(LDI, r, imm); } + void vld(Reg r, uint16_t idx) { encode(LD, r, idx); } + void vst(Reg r, uint16_t idx) { encode(ST, r, idx); } + void vadd(Reg r, uint16_t idx) { encode(ADD, r, idx); } + void vaddi(Reg r, uint16_t imm) { encode(ADDI, r, imm); } + void vsub(Reg r, uint16_t idx) { encode(SUB, r, idx); } + void vsubi(Reg r, uint16_t imm) { encode(SUBI, r, imm); } + void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast(offset)); } void vput(Reg r) { encode(PUT, r); } void setMark() { @@ -73,12 +73,12 @@ public: void run() { bool debug = false;//true; - uint32 reg[2] = { 0, 0 }; + uint32_t reg[2] = { 0, 0 }; const size_t end = code_.size(); - uint32 pc = 0; + uint32_t pc = 0; for (;;) { - uint32 x = code_[pc]; - uint32 code, r, imm; + uint32_t x = code_[pc]; + uint32_t code, r, imm; decode(code, r, imm, x); if (debug) { printf("---\n"); @@ -149,11 +149,11 @@ public: xor_(edi, edi); mov(mem, (size_t)mem_); const size_t end = code_.size(); - uint32 pc = 0; - uint32 labelNum = 0; + uint32_t pc = 0; + uint32_t labelNum = 0; for (;;) { - uint32 x = code_[pc]; - uint32 code, r, imm; + uint32_t x = code_[pc]; + uint32_t code, r, imm; decode(code, r, imm, x); L(Label::toStr(labelNum++)); switch (code) { @@ -229,18 +229,18 @@ public: ret(); } private: - uint32 mem_[65536]; + uint32_t mem_[65536]; Buffer code_; int mark_; - void decode(uint32& code, uint32& r, uint32& imm, uint32 x) + void decode(uint32_t& code, uint32_t& r, uint32_t& imm, uint32_t x) { code = x >> 24; r = (x >> 16) & 0xff; imm = x & 0xffff; } - void encode(Code code, Reg r, uint16 imm = 0) + void encode(Code code, Reg r, uint16_t imm = 0) { - uint32 x = (code << 24) | (r << 16) | imm; + uint32_t x = (code << 24) | (r << 16) | imm; code_.push_back(x); } }; @@ -262,7 +262,7 @@ public: */ vldi(A, 1); // c vst(A, 0); // p(1) - vldi(B, static_cast(n)); + vldi(B, static_cast(n)); vst(B, 2); // n // lp setMark(); @@ -283,9 +283,9 @@ public: } }; -void fibC(uint32 n) +void fibC(uint32_t n) { - uint32 p, c, t; + uint32_t p, c, t; p = 1; c = 1; lp: diff --git a/sample/toyvm.vcproj b/sample/toyvm.vcproj deleted file mode 100644 index 08d4f05e..00000000 --- a/sample/toyvm.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/toyvm.vcxproj b/sample/toyvm.vcxproj new file mode 100644 index 00000000..16635168 --- /dev/null +++ b/sample/toyvm.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {2E41C7AF-39FF-454C-B081-37445378DCB3} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/toyvm.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/toyvm.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/toyvm.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/toyvm.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/toyvm.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/toyvm.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/toyvm.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/toyvm.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/toyvm.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/toyvm.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/test/Makefile b/test/Makefile index 37a678cb..ac69b3e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,9 +1,18 @@ -TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32 +TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception XBYAK_INC=../xbyak/xbyak.h +UNAME_S=$(shell uname -s) BIT=32 ifeq ($(shell uname -m),x86_64) BIT=64 endif +ONLY_64BIT=0 +ifeq ($(UNAME_S),Darwin) + # 32-bit binary is not supported + ONLY_64BIT=1 +endif +ifeq ($(ONLY_64BIT),0) + TARGET += jmp address +endif ifeq ($(BIT),64) TARGET += jmp64 address64 @@ -36,18 +45,24 @@ cvt_test: cvt_test.cpp ../xbyak/xbyak.h $(CXX) $(CFLAGS) $< -o $@ cvt_test32: cvt_test.cpp ../xbyak/xbyak.h $(CXX) $(CFLAGS) $< -o $@ -DXBYAK32 +noexception: noexception.cpp ../xbyak/xbyak.h + $(CXX) $(CFLAGS) $< -o $@ -fno-exceptions -test: normalize_prefix jmp bad_address $(TARGET) +test_nm: normalize_prefix $(TARGET) $(MAKE) -C ../gen +ifneq ($(ONLY_64BIT),1) ./test_nm.sh + ./test_nm.sh noexcept + ./noexception ./test_nm.sh Y ./test_nm.sh avx512 ./test_address.sh ./jmp + ./cvt_test32 +endif ./bad_address ./misc ./cvt_test - ./cvt_test32 ifeq ($(BIT),64) ./test_address.sh 64 ./test_nm.sh 64 @@ -56,8 +71,10 @@ ifeq ($(BIT),64) endif test_avx: normalize_prefix +ifneq ($(ONLY_64BIT),0) ./test_avx.sh ./test_avx.sh Y +endif ifeq ($(BIT),64) ./test_address.sh 64 ./test_avx.sh 64 @@ -65,10 +82,18 @@ ifeq ($(BIT),64) endif test_avx512: normalize_prefix +ifneq ($(ONLY_64BIT),0) ./test_avx512.sh +endif ifeq ($(BIT),64) ./test_avx512.sh 64 endif + +test: + $(MAKE) test_nm + $(MAKE) test_avx + $(MAKE) test_avx512 + clean: rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512 diff --git a/test/bad_address.cpp b/test/bad_address.cpp index a74dd993..6d74a751 100644 --- a/test/bad_address.cpp +++ b/test/bad_address.cpp @@ -1,47 +1,28 @@ #include - -#define TEST_EXCEPTION(state) \ -{ \ - num++; \ - bool exception = false; \ - try { \ - state; \ - } catch (...) { \ - exception = true; \ - } \ - if (!exception) { \ - printf("exception should arise for %s\n", #state); \ - err++; \ - } \ -} +#include struct Code : Xbyak::CodeGenerator { Code() { - int err = 0; - int num = 0; - TEST_EXCEPTION(mov(eax, ptr [esp + esp])); - TEST_EXCEPTION(mov(eax, ptr [ax])); // not support - TEST_EXCEPTION(mov(eax, ptr [esp * 4])); - TEST_EXCEPTION(mov(eax, ptr [eax * 16])); - TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax])); - TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4])); - TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4])); - TEST_EXCEPTION(mov(eax, ptr [xmm0])); - TEST_EXCEPTION(fld(dword [xmm0])); - TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3)); - TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3)); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception); + CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception); + CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception); + CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception); #ifdef XBYAK64 - TEST_EXCEPTION(mov(eax, ptr [rax + eax])); - TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0])); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception); #endif - if (!err) { - printf("bad_address test %d ok\n", num); - } } }; -int main() +CYBOZU_TEST_AUTO(exception) { Code c; } diff --git a/test/jmp.cpp b/test/jmp.cpp index 9fe8ff69..e9192b2f 100644 --- a/test/jmp.cpp +++ b/test/jmp.cpp @@ -117,7 +117,7 @@ CYBOZU_TEST_AUTO(test1) int offset; bool isBack; bool isShort; - uint8 result[6]; + uint8_t result[6]; int size; } tbl[] = { { 0, true, true, { 0xeb, 0xfe }, 2 }, @@ -133,7 +133,7 @@ CYBOZU_TEST_AUTO(test1) const Tbl *p = &tbl[i]; for (int k = 0; k < 2; k++) { TestJmp jmp(p->offset, p->isBack, p->isShort, k == 0); - const uint8 *q = (const uint8*)jmp.getCode(); + const uint8_t *q = (const uint8_t*)jmp.getCode(); if (p->isBack) q += p->offset; /* skip nop */ for (int j = 0; j < p->size; j++) { CYBOZU_TEST_EQUAL(q[j], p->result[j]); @@ -205,6 +205,41 @@ CYBOZU_TEST_AUTO(testJmpCx) } } +CYBOZU_TEST_AUTO(loop) +{ + const uint8_t ok[] = { + // lp: + 0x31, 0xC0, // xor eax, eax + 0xE2, 0xFC, // loop lp + 0xE0, 0xFA, // loopne lp + 0xE1, 0xF8, // loope lp + }; + struct Code : CodeGenerator { + Code(bool useLabel) + { + if (useLabel) { + Xbyak::Label lp = L(); + xor_(eax, eax); + loop(lp); + loopne(lp); + loope(lp); + } else { + L("@@"); + xor_(eax, eax); + loop("@b"); + loopne("@b"); + loope("@b"); + } + } + }; + Code code1(false); + CYBOZU_TEST_EQUAL(code1.getSize(), sizeof(ok)); + CYBOZU_TEST_EQUAL_ARRAY(code1.getCode(), ok, sizeof(ok)); + Code code2(true); + CYBOZU_TEST_EQUAL(code2.getSize(), sizeof(ok)); + CYBOZU_TEST_EQUAL_ARRAY(code2.getCode(), ok, sizeof(ok)); +} + #ifdef _MSC_VER #pragma warning(disable : 4310) #endif @@ -337,11 +372,11 @@ CYBOZU_TEST_AUTO(test3) } #endif -Xbyak::uint8 bufL[4096 * 32]; -Xbyak::uint8 bufS[4096 * 2]; +uint8_t bufL[4096 * 32]; +uint8_t bufS[4096 * 2]; struct MyAllocator : Xbyak::Allocator { - Xbyak::uint8 *alloc(size_t size) + uint8_t *alloc(size_t size) { if (size < sizeof(bufS)) { printf("test use bufS(%d)\n", (int)size); @@ -354,7 +389,7 @@ struct MyAllocator : Xbyak::Allocator { fprintf(stderr, "no memory %d\n", (int)size); exit(1); } - void free(Xbyak::uint8 *) + void free(uint8_t *) { } } myAlloc; @@ -393,6 +428,7 @@ CYBOZU_TEST_AUTO(test4) } } +#ifndef __APPLE__ CYBOZU_TEST_AUTO(test5) { struct Test5 : Xbyak::CodeGenerator { @@ -440,8 +476,9 @@ CYBOZU_TEST_AUTO(test5) gm.assign((const char*)gc.getCode(), gc.getSize()); CYBOZU_TEST_EQUAL(fm, gm); } +#endif -size_t getValue(const uint8* p) +size_t getValue(const uint8_t* p) { size_t v = 0; for (size_t i = 0; i < sizeof(size_t); i++) { @@ -450,7 +487,7 @@ size_t getValue(const uint8* p) return v; } -void checkAddr(const uint8 *p, size_t offset, size_t expect) +void checkAddr(const uint8_t *p, size_t offset, size_t expect) { size_t v = getValue(p + offset); CYBOZU_TEST_EQUAL(v, size_t(p) + expect); @@ -498,7 +535,7 @@ CYBOZU_TEST_AUTO(MovLabel) const struct { int pos; - uint8 ok; + uint8_t ok; } tbl[] = { #ifdef XBYAK32 { 0x00, 0x90 }, @@ -532,11 +569,11 @@ CYBOZU_TEST_AUTO(MovLabel) const bool useNewLabel = k == 0; MovLabelCode code(grow, useNewLabel); if (grow) code.ready(); - const uint8* const p = code.getCode(); + const uint8_t* const p = code.getCode(); for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { int pos = tbl[i].pos; - uint8 x = p[pos]; - uint8 ok = tbl[i].ok; + uint8_t x = p[pos]; + uint8_t ok = tbl[i].ok; CYBOZU_TEST_EQUAL(x, ok); } #ifdef XBYAK32 @@ -1182,11 +1219,11 @@ CYBOZU_TEST_AUTO(rip_jmp) CYBOZU_TEST_EQUAL(ret, ret1234() + ret9999()); } -#ifdef XBYAK64_GCC +#if 0 CYBOZU_TEST_AUTO(rip_addr) { /* - assume |&x - &code| < 2GiB + we can't assume |&x - &code| < 2GiB anymore */ static int x = 5; struct Code : Xbyak::CodeGenerator { @@ -1201,6 +1238,8 @@ CYBOZU_TEST_AUTO(rip_addr) CYBOZU_TEST_EQUAL(x, 123); } #endif + +#ifndef __APPLE__ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf) { MIE_ALIGN(4096) static char buf[8192]; @@ -1225,6 +1264,7 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf) code.setProtectModeRW(); } #endif +#endif struct ReleaseTestCode : Xbyak::CodeGenerator { ReleaseTestCode(Label& L1, Label& L2, Label& L3) @@ -1270,3 +1310,76 @@ CYBOZU_TEST_AUTO(release_label_after_code) printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId()); } } + +struct JmpTypeCode : Xbyak::CodeGenerator { + void nops() + { + for (int i = 0; i < 130; i++) { + nop(); + } + } + // return jmp code size + size_t gen(bool pre, bool large, Xbyak::CodeGenerator::LabelType type) + { + Label label; + if (pre) { + L(label); + if (large) nops(); + size_t pos = getSize(); + jmp(label, type); + return getSize() - pos; + } else { + size_t pos = getSize(); + jmp(label, type); + size_t size = getSize() - pos; + if (large) nops(); + L(label); + return size; + } + } +}; + +CYBOZU_TEST_AUTO(setDefaultJmpNEAR) +{ + const Xbyak::CodeGenerator::LabelType T_SHORT = Xbyak::CodeGenerator::T_SHORT; + const Xbyak::CodeGenerator::LabelType T_NEAR = Xbyak::CodeGenerator::T_NEAR; + const Xbyak::CodeGenerator::LabelType T_AUTO = Xbyak::CodeGenerator::T_AUTO; + const struct { + bool pre; + bool large; + Xbyak::CodeGenerator::LabelType type; + size_t expect1; // 0 means exception + size_t expect2; + } tbl[] = { + { false, false, T_SHORT, 2, 2 }, + { false, false, T_NEAR, 5, 5 }, + { false, true, T_SHORT, 0, 0 }, + { false, true, T_NEAR, 5, 5 }, + + { true, false, T_SHORT, 2, 2 }, + { true, false, T_NEAR, 5, 5 }, + { true, true, T_SHORT, 0, 0 }, + { true, true, T_NEAR, 5, 5 }, + + { false, false, T_AUTO, 2, 5 }, + { false, true, T_AUTO, 0, 5 }, + { true, false, T_AUTO, 2, 2 }, + { true, true, T_AUTO, 5, 5 }, + }; + JmpTypeCode code1, code2; + code2.setDefaultJmpNEAR(true); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + if (tbl[i].expect1) { + size_t size = code1.gen(tbl[i].pre, tbl[i].large, tbl[i].type); + CYBOZU_TEST_EQUAL(size, tbl[i].expect1); + } else { + CYBOZU_TEST_EXCEPTION(code1.gen(tbl[i].pre, tbl[i].large, tbl[i].type), std::exception); + } + if (tbl[i].expect2) { + size_t size = code2.gen(tbl[i].pre, tbl[i].large, tbl[i].type); + CYBOZU_TEST_EQUAL(size, tbl[i].expect2); + } else { + CYBOZU_TEST_EXCEPTION(code2.gen(tbl[i].pre, tbl[i].large, tbl[i].type), std::exception); + } + } +} diff --git a/test/make_512.cpp b/test/make_512.cpp index 49d082c4..7b2d7f3b 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -9,111 +9,111 @@ using namespace Xbyak; const int bitEnd = 64; -const uint64 YMM_SAE = 1ULL << 0; -const uint64 _XMM = 1ULL << 1; -const uint64 _MEM = 1ULL << 2; -const uint64 _REG32 = 1ULL << 3; -const uint64 EAX = 1ULL << 4; -const uint64 IMM32 = 1ULL << 5; -const uint64 IMM8 = 1ULL << 6; -const uint64 _REG8 = 1ULL << 7; -const uint64 _REG16 = 1ULL << 8; -const uint64 XMM_K = 1ULL << 9; -const uint64 YMM_K = 1ULL << 10; -const uint64 ZMM_K = 1ULL << 11; -const uint64 AX = 1ULL << 12; -const uint64 AL = 1ULL << 13; -const uint64 IMM_1 = 1ULL << 14; -const uint64 MEM8 = 1ULL << 15; -const uint64 MEM16 = 1ULL << 16; -const uint64 MEM32 = 1ULL << 17; -const uint64 VM32Z = 1ULL << 19; -const uint64 K_K = 1ULL << 20; -const uint64 MEM_ONLY_DISP = 1ULL << 21; -const uint64 VM32X_K = 1ULL << 23; -const uint64 _YMM = 1ULL << 24; -const uint64 VM32X_32 = 1ULL << 39; -const uint64 VM32X_64 = 1ULL << 40; -const uint64 VM32Y_32 = 1ULL << 41; -const uint64 VM32Y_64 = 1ULL << 42; -const uint64 VM32Z_K = 1ULL << 32; +const uint64_t YMM_SAE = 1ULL << 0; +const uint64_t _XMM = 1ULL << 1; +const uint64_t _MEM = 1ULL << 2; +const uint64_t _REG32 = 1ULL << 3; +const uint64_t EAX = 1ULL << 4; +const uint64_t IMM32 = 1ULL << 5; +const uint64_t IMM8 = 1ULL << 6; +const uint64_t _REG8 = 1ULL << 7; +const uint64_t _REG16 = 1ULL << 8; +const uint64_t XMM_K = 1ULL << 9; +const uint64_t YMM_K = 1ULL << 10; +const uint64_t ZMM_K = 1ULL << 11; +const uint64_t AX = 1ULL << 12; +const uint64_t AL = 1ULL << 13; +const uint64_t IMM_1 = 1ULL << 14; +const uint64_t MEM8 = 1ULL << 15; +const uint64_t MEM16 = 1ULL << 16; +const uint64_t MEM32 = 1ULL << 17; +const uint64_t VM32Z = 1ULL << 19; +const uint64_t K_K = 1ULL << 20; +const uint64_t MEM_ONLY_DISP = 1ULL << 21; +const uint64_t VM32X_K = 1ULL << 23; +const uint64_t _YMM = 1ULL << 24; +const uint64_t VM32X_32 = 1ULL << 39; +const uint64_t VM32X_64 = 1ULL << 40; +const uint64_t VM32Y_32 = 1ULL << 41; +const uint64_t VM32Y_64 = 1ULL << 42; +const uint64_t VM32Z_K = 1ULL << 32; #ifdef XBYAK64 -const uint64 _MEMe = 1ULL << 25; -const uint64 REG32_2 = 1ULL << 26; // r8d, ... -const uint64 REG16_2 = 1ULL << 27; // r8w, ... -const uint64 REG8_2 = 1ULL << 28; // r8b, ... -const uint64 REG8_3 = 1ULL << 29; // spl, ... -const uint64 _REG64 = 1ULL << 30; // rax, ... -const uint64 _REG64_2 = 1ULL << 31; // r8, ... -const uint64 _XMM2 = 1ULL << 33; -const uint64 _YMM2 = 1ULL << 34; -const uint64 VM32X = VM32X_32 | VM32X_64; -const uint64 VM32Y = VM32Y_32 | VM32Y_64; +const uint64_t _MEMe = 1ULL << 25; +const uint64_t REG32_2 = 1ULL << 26; // r8d, ... +const uint64_t REG16_2 = 1ULL << 27; // r8w, ... +const uint64_t REG8_2 = 1ULL << 28; // r8b, ... +const uint64_t REG8_3 = 1ULL << 29; // spl, ... +const uint64_t _REG64 = 1ULL << 30; // rax, ... +const uint64_t _REG64_2 = 1ULL << 31; // r8, ... +const uint64_t _XMM2 = 1ULL << 33; +const uint64_t _YMM2 = 1ULL << 34; +const uint64_t VM32X = VM32X_32 | VM32X_64; +const uint64_t VM32Y = VM32Y_32 | VM32Y_64; #else -const uint64 _MEMe = 0; -const uint64 REG32_2 = 0; -const uint64 REG16_2 = 0; -const uint64 REG8_2 = 0; -const uint64 REG8_3 = 0; -const uint64 _REG64 = 0; -const uint64 _REG64_2 = 0; -const uint64 _XMM2 = 0; -const uint64 _YMM2 = 0; -const uint64 VM32X = VM32X_32; -const uint64 VM32Y = VM32Y_32; +const uint64_t _MEMe = 0; +const uint64_t REG32_2 = 0; +const uint64_t REG16_2 = 0; +const uint64_t REG8_2 = 0; +const uint64_t REG8_3 = 0; +const uint64_t _REG64 = 0; +const uint64_t _REG64_2 = 0; +const uint64_t _XMM2 = 0; +const uint64_t _YMM2 = 0; +const uint64_t VM32X = VM32X_32; +const uint64_t VM32Y = VM32Y_32; #endif -const uint64 REG64 = _REG64 | _REG64_2; -const uint64 REG32 = _REG32 | REG32_2 | EAX; -const uint64 REG16 = _REG16 | REG16_2 | AX; -const uint64 REG32e = REG32 | REG64; -const uint64 REG8 = _REG8 | REG8_2|AL; -const uint64 MEM = _MEM | _MEMe; -const uint64 MEM64 = 1ULL << 35; -const uint64 YMM_ER = 1ULL << 36; -const uint64 VM32Y_K = 1ULL << 37; -const uint64 IMM_2 = 1ULL << 38; -const uint64 IMM = IMM_1 | IMM_2; -const uint64 YMM = _YMM | _YMM2; -const uint64 K = 1ULL << 43; -const uint64 _ZMM = 1ULL << 44; -const uint64 _ZMM2 = 1ULL << 45; +const uint64_t REG64 = _REG64 | _REG64_2; +const uint64_t REG32 = _REG32 | REG32_2 | EAX; +const uint64_t REG16 = _REG16 | REG16_2 | AX; +const uint64_t REG32e = REG32 | REG64; +const uint64_t REG8 = _REG8 | REG8_2|AL; +const uint64_t MEM = _MEM | _MEMe; +const uint64_t MEM64 = 1ULL << 35; +const uint64_t YMM_ER = 1ULL << 36; +const uint64_t VM32Y_K = 1ULL << 37; +const uint64_t IMM_2 = 1ULL << 38; +const uint64_t IMM = IMM_1 | IMM_2; +const uint64_t YMM = _YMM | _YMM2; +const uint64_t K = 1ULL << 43; +const uint64_t _ZMM = 1ULL << 44; +const uint64_t _ZMM2 = 1ULL << 45; #ifdef XBYAK64 -const uint64 ZMM = _ZMM | _ZMM2; -const uint64 _YMM3 = 1ULL << 46; +const uint64_t ZMM = _ZMM | _ZMM2; +const uint64_t _YMM3 = 1ULL << 46; #else -const uint64 ZMM = _ZMM; -const uint64 _YMM3 = 0; +const uint64_t ZMM = _ZMM; +const uint64_t _YMM3 = 0; #endif -const uint64 K2 = 1ULL << 47; -const uint64 ZMM_SAE = 1ULL << 48; -const uint64 ZMM_ER = 1ULL << 49; +const uint64_t K2 = 1ULL << 47; +const uint64_t ZMM_SAE = 1ULL << 48; +const uint64_t ZMM_ER = 1ULL << 49; #ifdef XBYAK64 -const uint64 _XMM3 = 1ULL << 50; +const uint64_t _XMM3 = 1ULL << 50; #else -const uint64 _XMM3 = 0; +const uint64_t _XMM3 = 0; #endif -const uint64 XMM = _XMM | _XMM2 | _XMM3; -const uint64 XMM_SAE = 1ULL << 51; +const uint64_t XMM = _XMM | _XMM2 | _XMM3; +const uint64_t XMM_SAE = 1ULL << 51; #ifdef XBYAK64 -const uint64 XMM_KZ = 1ULL << 52; -const uint64 YMM_KZ = 1ULL << 53; -const uint64 ZMM_KZ = 1ULL << 54; +const uint64_t XMM_KZ = 1ULL << 52; +const uint64_t YMM_KZ = 1ULL << 53; +const uint64_t ZMM_KZ = 1ULL << 54; #else -const uint64 XMM_KZ = 0; -const uint64 YMM_KZ = 0; -const uint64 ZMM_KZ = 0; +const uint64_t XMM_KZ = 0; +const uint64_t YMM_KZ = 0; +const uint64_t ZMM_KZ = 0; #endif -const uint64 MEM_K = 1ULL << 55; -const uint64 M_1to2 = 1ULL << 56; -const uint64 M_1to4 = 1ULL << 57; -const uint64 M_1to8 = 1ULL << 58; -const uint64 M_1to16 = 1ULL << 59; -const uint64 XMM_ER = 1ULL << 60; -const uint64 M_xword = 1ULL << 61; -const uint64 M_yword = 1ULL << 62; -const uint64 MY_1to4 = 1ULL << 18; +const uint64_t MEM_K = 1ULL << 55; +const uint64_t M_1to2 = 1ULL << 56; +const uint64_t M_1to4 = 1ULL << 57; +const uint64_t M_1to8 = 1ULL << 58; +const uint64_t M_1to16 = 1ULL << 59; +const uint64_t XMM_ER = 1ULL << 60; +const uint64_t M_xword = 1ULL << 61; +const uint64_t M_yword = 1ULL << 62; +const uint64_t MY_1to4 = 1ULL << 18; -const uint64 NOPARA = 1ULL << (bitEnd - 1); +const uint64_t NOPARA = 1ULL << (bitEnd - 1); class Test { Test(const Test&); @@ -121,7 +121,7 @@ class Test { const bool isXbyak_; int funcNum_; // check all op1, op2, op3 - void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const + void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const { for (int i = 0; i < bitEnd; i++) { if ((op1 & (1ULL << i)) == 0) continue; @@ -144,7 +144,7 @@ class Test { } } } - void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const + void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const { for (int i = 0; i < bitEnd; i++) { if ((op & (1ULL << i)) == 0) continue; @@ -156,7 +156,7 @@ class Test { printf("\n"); } } - void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const + void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const { if (nasm == 0) nasm = xbyak; for (int i = 0; i < bitEnd; i++) { @@ -169,7 +169,7 @@ class Test { printf("\n"); } } - const char *get(uint64 type) const + const char *get(uint64_t type) const { int idx = (rand() / 31) & 7; switch (type) { @@ -537,7 +537,7 @@ public: printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]); } else { if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx); - if (z) pz = "{z}"; + if (z && kIdx) pz = "{z}"; printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]); } } @@ -574,9 +574,9 @@ public: for (size_t k = 0; k < N; k++) { #ifdef XBYAK64 for (int kIdx = 0; kIdx < 8; kIdx++) { + put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx); + put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx); for (int z = 0; z < 2; z++) { - put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1); - put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1); for (int sae = 0; sae < 5; sae++) { put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae); } @@ -615,6 +615,13 @@ public: put(p->name, K, _YMM, _YMM | MEM, IMM8); put(p->name, K, _ZMM, _ZMM | MEM, IMM8); } + put("vcmppd", K, XMM, M_1to2, IMM8); + put("vcmppd", K, YMM, M_1to4, IMM8); + put("vcmppd", K, ZMM, M_1to8, IMM8); + + put("vcmpps", K, XMM, M_1to4, IMM8); + put("vcmpps", K, YMM, M_1to8, IMM8); + put("vcmpps", K, ZMM, M_1to16, IMM8); } put("vcmppd", K2, ZMM, ZMM_SAE, IMM); #ifdef XBYAK64 diff --git a/test/make_nm.cpp b/test/make_nm.cpp index f109f1ec..47eb0237 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1,5 +1,4 @@ #include -#define XBYAK_NO_OP_NAMES #include "xbyak/xbyak.h" #include "xbyak/xbyak_bin2hex.h" #include @@ -11,111 +10,111 @@ using namespace Xbyak; const int bitEnd = 64; -const uint64 MMX = 1ULL << 0; -const uint64 _XMM = 1ULL << 1; -const uint64 _MEM = 1ULL << 2; -const uint64 _REG32 = 1ULL << 3; -const uint64 EAX = 1ULL << 4; -const uint64 IMM32 = 1ULL << 5; -const uint64 IMM8 = 1ULL << 6; -const uint64 _REG8 = 1ULL << 7; -const uint64 _REG16 = 1ULL << 8; -const uint64 NEG8 = 1ULL << 9; -const uint64 IMM16 = 1ULL << 10; -const uint64 NEG16 = 1ULL << 11; -const uint64 AX = 1ULL << 12; -const uint64 AL = 1ULL << 13; -const uint64 IMM_1 = 1ULL << 14; -const uint64 MEM8 = 1ULL << 15; -const uint64 MEM16 = 1ULL << 16; -const uint64 MEM32 = 1ULL << 17; -const uint64 ONE = 1ULL << 19; -const uint64 CL = 1ULL << 20; -const uint64 MEM_ONLY_DISP = 1ULL << 21; -const uint64 NEG32 = 1ULL << 23; -const uint64 _YMM = 1ULL << 24; -const uint64 VM32X_32 = 1ULL << 39; -const uint64 VM32X_64 = 1ULL << 40; -const uint64 VM32Y_32 = 1ULL << 41; -const uint64 VM32Y_64 = 1ULL << 42; +const uint64_t MMX = 1ULL << 0; +const uint64_t _XMM = 1ULL << 1; +const uint64_t _MEM = 1ULL << 2; +const uint64_t _REG32 = 1ULL << 3; +const uint64_t EAX = 1ULL << 4; +const uint64_t IMM32 = 1ULL << 5; +const uint64_t IMM8 = 1ULL << 6; +const uint64_t _REG8 = 1ULL << 7; +const uint64_t _REG16 = 1ULL << 8; +const uint64_t NEG8 = 1ULL << 9; +const uint64_t IMM16 = 1ULL << 10; +const uint64_t NEG16 = 1ULL << 11; +const uint64_t AX = 1ULL << 12; +const uint64_t AL = 1ULL << 13; +const uint64_t IMM_1 = 1ULL << 14; +const uint64_t MEM8 = 1ULL << 15; +const uint64_t MEM16 = 1ULL << 16; +const uint64_t MEM32 = 1ULL << 17; +const uint64_t ONE = 1ULL << 19; +const uint64_t CL = 1ULL << 20; +const uint64_t MEM_ONLY_DISP = 1ULL << 21; +const uint64_t NEG32 = 1ULL << 23; +const uint64_t _YMM = 1ULL << 24; +const uint64_t VM32X_32 = 1ULL << 39; +const uint64_t VM32X_64 = 1ULL << 40; +const uint64_t VM32Y_32 = 1ULL << 41; +const uint64_t VM32Y_64 = 1ULL << 42; #ifdef XBYAK64 -const uint64 _MEMe = 1ULL << 25; -const uint64 REG32_2 = 1ULL << 26; // r8d, ... -const uint64 REG16_2 = 1ULL << 27; // r8w, ... -const uint64 REG8_2 = 1ULL << 28; // r8b, ... -const uint64 REG8_3 = 1ULL << 29; // spl, ... -const uint64 _REG64 = 1ULL << 30; // rax, ... -const uint64 _REG64_2 = 1ULL << 31; // r8, ... -const uint64 RAX = 1ULL << 32; -const uint64 _XMM2 = 1ULL << 33; -const uint64 _YMM2 = 1ULL << 34; -const uint64 VM32X = VM32X_32 | VM32X_64; -const uint64 VM32Y = VM32Y_32 | VM32Y_64; +const uint64_t _MEMe = 1ULL << 25; +const uint64_t REG32_2 = 1ULL << 26; // r8d, ... +const uint64_t REG16_2 = 1ULL << 27; // r8w, ... +const uint64_t REG8_2 = 1ULL << 28; // r8b, ... +const uint64_t REG8_3 = 1ULL << 29; // spl, ... +const uint64_t _REG64 = 1ULL << 30; // rax, ... +const uint64_t _REG64_2 = 1ULL << 31; // r8, ... +const uint64_t RAX = 1ULL << 32; +const uint64_t _XMM2 = 1ULL << 33; +const uint64_t _YMM2 = 1ULL << 34; +const uint64_t VM32X = VM32X_32 | VM32X_64; +const uint64_t VM32Y = VM32Y_32 | VM32Y_64; #else -const uint64 _MEMe = 0; -const uint64 REG32_2 = 0; -const uint64 REG16_2 = 0; -const uint64 REG8_2 = 0; -const uint64 REG8_3 = 0; -const uint64 _REG64 = 0; -const uint64 _REG64_2 = 0; -const uint64 RAX = 0; -const uint64 _XMM2 = 0; -const uint64 _YMM2 = 0; -const uint64 VM32X = VM32X_32; -const uint64 VM32Y = VM32Y_32; +const uint64_t _MEMe = 0; +const uint64_t REG32_2 = 0; +const uint64_t REG16_2 = 0; +const uint64_t REG8_2 = 0; +const uint64_t REG8_3 = 0; +const uint64_t _REG64 = 0; +const uint64_t _REG64_2 = 0; +const uint64_t RAX = 0; +const uint64_t _XMM2 = 0; +const uint64_t _YMM2 = 0; +const uint64_t VM32X = VM32X_32; +const uint64_t VM32Y = VM32Y_32; #endif -const uint64 REG64 = _REG64 | _REG64_2 | RAX; -const uint64 REG32 = _REG32 | REG32_2 | EAX; -const uint64 REG16 = _REG16 | REG16_2 | AX; -const uint64 REG32e = REG32 | REG64; -const uint64 REG8 = _REG8 | REG8_2|AL; -const uint64 MEM = _MEM | _MEMe; -const uint64 MEM64 = 1ULL << 35; -const uint64 ST0 = 1ULL << 36; -const uint64 STi = 1ULL << 37; -const uint64 IMM_2 = 1ULL << 38; -const uint64 IMM = IMM_1 | IMM_2; -const uint64 XMM = _XMM | _XMM2; -const uint64 YMM = _YMM | _YMM2; -const uint64 K = 1ULL << 43; -const uint64 _ZMM = 1ULL << 44; -const uint64 _ZMM2 = 1ULL << 45; +const uint64_t REG64 = _REG64 | _REG64_2 | RAX; +const uint64_t REG32 = _REG32 | REG32_2 | EAX; +const uint64_t REG16 = _REG16 | REG16_2 | AX; +const uint64_t REG32e = REG32 | REG64; +const uint64_t REG8 = _REG8 | REG8_2|AL; +const uint64_t MEM = _MEM | _MEMe; +const uint64_t MEM64 = 1ULL << 35; +const uint64_t ST0 = 1ULL << 36; +const uint64_t STi = 1ULL << 37; +const uint64_t IMM_2 = 1ULL << 38; +const uint64_t IMM = IMM_1 | IMM_2; +const uint64_t XMM = _XMM | _XMM2; +const uint64_t YMM = _YMM | _YMM2; +const uint64_t K = 1ULL << 43; +const uint64_t _ZMM = 1ULL << 44; +const uint64_t _ZMM2 = 1ULL << 45; #ifdef XBYAK64 -const uint64 ZMM = _ZMM | _ZMM2; -const uint64 _YMM3 = 1ULL << 46; +const uint64_t ZMM = _ZMM | _ZMM2; +const uint64_t _YMM3 = 1ULL << 46; #else -const uint64 ZMM = _ZMM; -const uint64 _YMM3 = 0; +const uint64_t ZMM = _ZMM; +const uint64_t _YMM3 = 0; #endif -const uint64 K2 = 1ULL << 47; -const uint64 ZMM_SAE = 1ULL << 48; -const uint64 ZMM_ER = 1ULL << 49; +const uint64_t K2 = 1ULL << 47; +const uint64_t ZMM_SAE = 1ULL << 48; +const uint64_t ZMM_ER = 1ULL << 49; #ifdef XBYAK64 -const uint64 _XMM3 = 1ULL << 50; +const uint64_t _XMM3 = 1ULL << 50; #endif -const uint64 XMM_SAE = 1ULL << 51; +const uint64_t XMM_SAE = 1ULL << 51; #ifdef XBYAK64 -const uint64 XMM_KZ = 1ULL << 52; -const uint64 YMM_KZ = 1ULL << 53; -const uint64 ZMM_KZ = 1ULL << 54; +const uint64_t XMM_KZ = 1ULL << 52; +const uint64_t YMM_KZ = 1ULL << 53; +const uint64_t ZMM_KZ = 1ULL << 54; #else -const uint64 XMM_KZ = 0; -const uint64 YMM_KZ = 0; -const uint64 ZMM_KZ = 0; +const uint64_t XMM_KZ = 0; +const uint64_t YMM_KZ = 0; +const uint64_t ZMM_KZ = 0; #endif -const uint64 MEM_K = 1ULL << 55; -const uint64 M_1to2 = 1ULL << 56; -const uint64 M_1to4 = 1ULL << 57; -const uint64 M_1to8 = 1ULL << 58; -const uint64 M_1to16 = 1ULL << 59; -const uint64 XMM_ER = 1ULL << 60; -const uint64 M_xword = 1ULL << 61; -const uint64 M_yword = 1ULL << 62; -const uint64 MY_1to4 = 1ULL << 18; -const uint64 BNDREG = 1ULL << 22; +const uint64_t MEM_K = 1ULL << 55; +const uint64_t M_1to2 = 1ULL << 56; +const uint64_t M_1to4 = 1ULL << 57; +const uint64_t M_1to8 = 1ULL << 58; +const uint64_t M_1to16 = 1ULL << 59; +const uint64_t XMM_ER = 1ULL << 60; +const uint64_t M_xword = 1ULL << 61; +const uint64_t M_yword = 1ULL << 62; +const uint64_t MY_1to4 = 1ULL << 18; +const uint64_t BNDREG = 1ULL << 22; -const uint64 NOPARA = 1ULL << (bitEnd - 1); +const uint64_t NOPARA = 1ULL << (bitEnd - 1); class Test { Test(const Test&); @@ -132,7 +131,7 @@ class Test { } // check all op1, op2, op3 - void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const + void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const { for (int i = 0; i < bitEnd; i++) { if ((op1 & (1ULL << i)) == 0) continue; @@ -155,7 +154,7 @@ class Test { } } } - void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const + void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const { for (int i = 0; i < bitEnd; i++) { if ((op & (1ULL << i)) == 0) continue; @@ -167,7 +166,7 @@ class Test { printf("\n"); } } - void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const + void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const { if (nasm == 0) nasm = xbyak; for (int i = 0; i < bitEnd; i++) { @@ -180,7 +179,7 @@ class Test { printf("\n"); } } - const char *get(uint64 type) const + const char *get(uint64_t type) const { int idx = (rand() / 31) & 7; if (type == ST0) { @@ -460,8 +459,14 @@ class Test { "cqo", "cmpsq", "movsq", + "popfq", + "pushfq", + "lodsq", + "movsq", "scasq", "stosq", + "syscall", + "sysret", #else "aaa", "aad", @@ -469,6 +474,7 @@ class Test { "aas", "daa", "das", + "into", "popad", "popfd", "pusha", @@ -493,9 +499,17 @@ class Test { "cmpsb", "cmpsw", "cmpsd", + "int3", + "leave", + "lodsb", + "lodsw", + "lodsd", "movsb", "movsw", "movsd", + "outsb", + "outsw", + "outsd", "scasb", "scasw", "scasd", @@ -508,6 +522,8 @@ class Test { "stc", "std", "sti", + "sysenter", + "sysexit", "emms", "pause", @@ -540,6 +556,8 @@ class Test { "fabs", "faddp", "fchs", + "fclex", + "fnclex", "fcom", "fcomp", "fcompp", @@ -579,15 +597,52 @@ class Test { "fxtract", "fyl2x", "fyl2xp1", + + "monitorx", + "mwaitx", + "clzero", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { put(tbl[i]); } + { + const char memTbl[][16] = { + "clflush", + "clflushopt", + "fbld", + "fbstp", + "fldcw", + "fldenv", + "frstor", + "fsave", + "fnsave", + "fstcw", + "fnstcw", + "fstenv", + "fnstenv", + "fstsw", + "fnstsw", + "fxrstor", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) { + put(memTbl[i], MEM); + } + put("fstsw", AX); + put("fnstsw", AX); + } put("bswap", REG32e); put("lea", REG32e|REG16, MEM); - put("fldcw", MEM); - put("fstcw", MEM); + put("enter", IMM, IMM); + put(isXbyak_ ? "int_" : "int", IMM8); + put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8); + puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx"); + puts(isXbyak_ ? "in_(ax, dx); dump();" : "in ax, dx"); + puts(isXbyak_ ? "in_(eax, dx); dump();" : "in eax, dx"); + put(isXbyak_ ? "out_" : "out", IMM8, AL|AX|EAX); + puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al"); + puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax"); + puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax"); } void putJmp() const { @@ -803,7 +858,7 @@ class Test { SD = 1 << 3 }; const struct { - uint8 code; + uint8_t code; const char *name; } sufTbl[] = { { 0, "ps" }, @@ -812,7 +867,7 @@ class Test { { 0xF2, "sd" }, }; static const struct XmmTbl1 { - uint8 code; + uint8_t code; int mode; const char *name; bool hasImm; @@ -841,7 +896,7 @@ class Test { for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { if (!(p->mode & (1 << j))) continue; char buf[16]; - sprintf(buf, "%s%s", p->name, sufTbl[j].name); + snprintf(buf, sizeof(buf), "%s%s", p->name, sufTbl[j].name); if (p->hasImm) { put(buf, XMM, XMM|MEM, IMM); } else { @@ -891,8 +946,8 @@ class Test { { static const struct Tbl { const char *name; - uint64 op1; - uint64 op2; + uint64_t op1; + uint64_t op2; } tbl[] = { { "cvtpi2ps", XMM, MMX|MEM }, { "cvtps2pi", MMX, XMM|MEM }, @@ -928,7 +983,9 @@ class Test { } void putCmov() const { - const char tbl[][4] = { + const struct { + const char *s; + } tbl[] = { "o", "no", "b", @@ -961,12 +1018,12 @@ class Test { "g", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - char buf[16]; - sprintf(buf, "cmov%s", tbl[i]); + char buf[32]; + snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s); put(buf, REG16, REG16|MEM); put(buf, REG32, REG32|MEM); put(buf, REG64, REG64|MEM); - sprintf(buf, "set%s", tbl[i]); + snprintf(buf, sizeof(buf), "set%s", tbl[i].s); put(buf, REG8|REG8_3|MEM); } } @@ -1088,6 +1145,33 @@ class Test { put("pop", REG32|MEM32); #endif } + void putPushPop8_16() const + { + const struct { + int b; + uint32_t v; + } tbl[] = { + { 8, 0x7f }, + { 8, 0x80 }, + { 8, 0xff }, + { 8, 0x100 }, + { 8, 0x12345 }, + { 16, 0x7fff }, + { 16, 0x8000 }, + { 16, 0xffff }, + { 16, 0x10000 }, + { 16, 0x12345 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *b = tbl[i].b == 8 ? "byte" : "word"; + uint32_t v = tbl[i].v; + if (isXbyak_) { + printf("push(%s, 0x%x);dump();\n", b, v); + } else { + printf("push %s 0x%x\n", b, v); + } + } + } void putTest() const { const char *p = "test"; @@ -1121,6 +1205,30 @@ class Test { put("mov", REG64, tbl[i].a, tbl[i].b); } } + void putLoadSeg() const + { + const struct Tbl { + const char *name; + bool support64Bit; + } tbl[] = { +#ifdef XBYAK32 + { "lds", false }, + { "les", false }, +#endif + { "lss", true }, + { "lfs", true }, + { "lgs", true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + put(p->name, REG16|REG32, MEM); +#ifdef XBYAK64 + if (p->support64Bit) { + put(p->name, REG64, MEM); + } +#endif + } + } // only nasm void putMovImm64() const { @@ -1176,6 +1284,7 @@ class Test { put("cmpxchg8b", MEM); #ifdef XBYAK64 put("cmpxchg16b", MEM); + put("fxrstor64", MEM); #endif { const char tbl[][8] = { @@ -1384,9 +1493,9 @@ class Test { void putMPX() const { #ifdef XBYAK64 - const uint64 reg = REG64; + const uint64_t reg = REG64; #else - const uint64 reg = REG32; + const uint64_t reg = REG32; #endif put("bndcl", BNDREG, reg|MEM); put("bndcu", BNDREG, reg|MEM); @@ -2414,6 +2523,7 @@ public: separateFunc(); putSSE4_2(); putSeg(); // same behavior as yasm for mov rax, cx + putPushPop8_16(); #else putSIMPLE(); putReg1(); @@ -2423,6 +2533,7 @@ public: putPushPop(); putTest(); separateFunc(); + putLoadSeg(); putEtc(); putShift(); putShxd(); @@ -2447,7 +2558,6 @@ public: putFpuMem32_64(); separateFunc(); putFpuMem16_32_64(); - put("clflush", MEM); // current nasm is ok putFpu(); putFpuFpu(); putCmp(); @@ -2546,7 +2656,7 @@ public: printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]); } else { if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx); - if (z) pz = "{z}"; + if (z && kIdx) pz = "{z}"; printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]); } } @@ -2583,9 +2693,9 @@ public: for (size_t k = 0; k < N; k++) { #ifdef XBYAK64 for (int kIdx = 0; kIdx < 8; kIdx++) { + put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx); + put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx); for (int z = 0; z < 2; z++) { - put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1); - put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1); for (int sae = 0; sae < 5; sae++) { put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae); } diff --git a/test/misc.cpp b/test/misc.cpp index 3967fefc..2a55ec2e 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -97,13 +97,43 @@ CYBOZU_TEST_AUTO(align) CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u); } align(alignSize); - const uint8 *p = getCurr(); + const uint8_t *p = getCurr(); // do nothing if aligned align(alignSize); CYBOZU_TEST_EQUAL(p, getCurr()); } } c; } +CYBOZU_TEST_AUTO(kmask) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + CYBOZU_TEST_EXCEPTION(kmovb(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovw(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovd(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovq(k1, eax), std::exception); +#ifdef XBYAK64 + CYBOZU_TEST_EXCEPTION(kmovb(k1, rax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovw(k1, rax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception); + CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax)); +#endif + CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax])); + checkT_z(); + } + void checkT_z() + { + const uint8_t *p1 = getCurr(); + vmovaps(zm0, ptr[eax]); + const uint8_t *p2 = getCurr(); + vmovaps(zm0|T_z, ptr[eax]); + const uint8_t *end = getCurr(); + CYBOZU_TEST_EQUAL(p2 - p1, end - p2); + CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2); + } + } c; +} #ifdef XBYAK64 CYBOZU_TEST_AUTO(vfmaddps) @@ -683,4 +713,106 @@ CYBOZU_TEST_AUTO(gf2) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + +CYBOZU_TEST_AUTO(bf16) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]); + vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]); + vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]); + + vcvtneps2bf16(xmm0, xword [rax + 64]); + vcvtneps2bf16(xmm0 | k1, yword [rax + 64]); + vcvtneps2bf16(ymm0 | k1, zword [rax + 64]); + vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]); + + vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]); + vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]); + vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xf2, 0x77, 0x09, 0x72, 0x40, 0x04, + 0x62, 0xf2, 0x7f, 0xa9, 0x72, 0x40, 0x02, + 0x62, 0xf2, 0x77, 0x49, 0x72, 0x40, 0x01, + + 0x62, 0xf2, 0x7e, 0x08, 0x72, 0x40, 0x04, + 0x62, 0xf2, 0x7e, 0x29, 0x72, 0x40, 0x02, + 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, + 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, + + 0x62, 0xf2, 0x76, 0x09, 0x52, 0x40, 0x04, + 0x62, 0xf2, 0x76, 0x29, 0x52, 0x40, 0x02, + 0x62, 0xf2, 0x76, 0x49, 0x52, 0x40, 0x01, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(AMX) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + ldtilecfg(ptr[rax + rcx * 4 + 64]); + sttilecfg(ptr[rsp + rax * 8 + 128]); + tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]); + tileloaddt1(tmm4, ptr[r8 + r9 + 32]); + tilerelease(); + tilestored(ptr[r10 + r11 * 2 + 32], tmm2); + tilezero(tmm7); + tdpbssd(tmm1, tmm2, tmm3); + tdpbsud(tmm2, tmm3, tmm4); + tdpbusd(tmm3, tmm4, tmm5); + tdpbuud(tmm4, tmm5, tmm6); + tdpbf16ps(tmm5, tmm6, tmm7); + } + } c; + // generated code by patch + const uint8_t tbl[] = { + 0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00, + 0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4, + 0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8, + 0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4, + 0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(tileloadd) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + tileloadd(tmm1, ptr[r8+r8]); + tileloadd(tmm1, ptr[rax+rcx*4]); + tileloadd(tmm1, ptr[r8+r9*1+0x40]); + } + void notSupported() + { + tileloadd(tmm1, ptr[r8]); + } + void notSupported2() + { + tileloadd(tmm1, ptr[r8*2]); + } + } c; + const uint8_t tbl[] = { + 0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00, + 0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88, + 0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); + + // current version does not support this sibmem format + CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception); + CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception); +} #endif diff --git a/test/nm_frame.cpp b/test/nm_frame.cpp index 9deceba2..ffdcc978 100644 --- a/test/nm_frame.cpp +++ b/test/nm_frame.cpp @@ -1,7 +1,8 @@ #include -#define XBYAK_NO_OP_NAMES #define XBYAK_ENABLE_OMITTED_OPERAND #include "xbyak/xbyak.h" +#define CYBOZU_TEST_DISABLE_AUTO_RUN +#include "cybozu/test.hpp" using namespace Xbyak; @@ -15,39 +16,27 @@ public: #include "nm.cpp" }; -#define _STR(x) #x -#define TEST(syntax) err = true; try { syntax; err = false; } catch (Xbyak::Error) { } catch (...) { } if (!err) printf("should be err:%s;\n", _STR(syntax)) class ErrorSample : public CodeGenerator { void operator=(const ErrorSample&); public: void gen() { - bool err; - TEST(mov(ptr[eax],1)); - TEST(test(ptr[eax],1)); - TEST(adc(ptr[eax],1)); - TEST(setz(eax)); +#ifndef XBYAK_NO_EXCEPTION + CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(setz(eax), std::exception); +#endif } }; + int main() - try { - size_t size = sizeof(Xbyak::Operand); - if (size != 4) { - printf("sizeof Operand %d\n", (int)size); - } - try { - Sample s; - s.gen(); - } catch (std::exception& e) { - printf("ERR:%s\n", e.what()); - } catch (...) { - printf("unknown error\n"); - } + // the size of Operand exceeds 32 bit. + CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u); + Sample s; + s.gen(); ErrorSample es; es.gen(); -} catch (std::exception& e) { - printf("err %s\n", e.what()); - return 1; } diff --git a/test/noexception.cpp b/test/noexception.cpp new file mode 100644 index 00000000..04a6dbc2 --- /dev/null +++ b/test/noexception.cpp @@ -0,0 +1,111 @@ +#define XBYAK_NO_EXCEPTION +#include + +using namespace Xbyak; + +int g_err = 0; +int g_test = 0; + +void assertEq(int x, int y) +{ + if (x != y) { + printf("ERR x=%d y=%d\n", x, y); + g_err++; + } + g_test++; +} + +void assertBool(bool b) +{ + if (!b) { + printf("ERR assertBool\n"); + g_err++; + } + g_test++; +} + +void test1() +{ + const int v = 123; + struct Code : CodeGenerator { + Code() + { + mov(eax, v); + ret(); + } + } c; + int (*f)() = c.getCode(); + assertEq(f(), v); + assertEq(Xbyak::GetError(), ERR_NONE); +} + +void test2() +{ + struct Code : CodeGenerator { + Code() + { + Label lp; + L(lp); + L(lp); + } + } c; + assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED); + Xbyak::ClearError(); +} + +void test3() +{ + static struct EmptyAllocator : Xbyak::Allocator { + uint8_t *alloc() { return 0; } + } emptyAllocator; + struct Code : CodeGenerator { + Code() : CodeGenerator(8, 0, &emptyAllocator) + { + mov(eax, 3); + assertBool(Xbyak::GetError() == 0); + mov(eax, 3); + mov(eax, 3); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + assertBool(Xbyak::GetError() == 0); + } + } c; +} + +void test4() +{ + struct Code : CodeGenerator { + Code() + { + mov(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + test(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + adc(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + setz(eax); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + } + }; +} + +int main() +{ + test1(); + test2(); + test3(); + test4(); + if (g_err) { + printf("err %d/%d\n", g_err, g_test); + } else { + printf("all ok %d\n", g_test); + } + return g_err != 0; +} diff --git a/test/normalize_prefix.cpp b/test/normalize_prefix.cpp index 53eae8dc..889d9253 100644 --- a/test/normalize_prefix.cpp +++ b/test/normalize_prefix.cpp @@ -6,7 +6,7 @@ #include #include -typedef unsigned char uint8; +typedef unsigned char uint8_t; std::string normalize(const std::string& line) { diff --git a/test/set_opt.bat b/test/set_opt.bat new file mode 100644 index 00000000..73a83f61 --- /dev/null +++ b/test/set_opt.bat @@ -0,0 +1,2 @@ +@echo off +set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo \ No newline at end of file diff --git a/test/sf_test.cpp b/test/sf_test.cpp index 286ecd1a..038b3f0d 100644 --- a/test/sf_test.cpp +++ b/test/sf_test.cpp @@ -218,7 +218,7 @@ void check(int x, int y) } } -void verify(const Xbyak::uint8 *f, int pNum) +void verify(const Xbyak::uint8_t *f, int pNum) { switch (pNum) { case 0: @@ -264,7 +264,7 @@ void testAll() } for (int tNum = 0; tNum < maxNum; tNum++) { // printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize); - const Xbyak::uint8 *f = code.getCurr(); + const Xbyak::uint8_t *f = code.getCurr(); code.gen(pNum, tNum | opt, stackSize); verify(f, pNum); /* diff --git a/test/test_address.sh b/test/test_address.sh index 8466cc24..d63a4ef5 100755 --- a/test/test_address.sh +++ b/test/test_address.sh @@ -20,7 +20,6 @@ echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame > x.lst diff ok.lst x.lst && echo "ok" -wc x.lst } diff --git a/test/test_avx.sh b/test/test_avx.sh index 0b42eebf..236f7aa5 100755 --- a/test/test_avx.sh +++ b/test/test_avx.sh @@ -33,12 +33,11 @@ g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_avx512.sh b/test/test_avx512.sh index 0a03109f..cce5de0c 100755 --- a/test/test_avx512.sh +++ b/test/test_avx512.sh @@ -29,5 +29,4 @@ echo "xbyak" echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512 ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_avx_all.bat b/test/test_avx_all.bat index ec74d281..3bf79f2c 100644 --- a/test/test_avx_all.bat +++ b/test/test_avx_all.bat @@ -7,3 +7,7 @@ echo ** yasm-avx(32bit) *** call test_avx Y echo ** yasm-avx(64bit) *** call test_avx Y64 +echo ** nasm-avx512(32bit) *** +call test_avx512 +echo ** nasm-avx512(64bit) *** +call test_avx512 64 diff --git a/test/test_nm.bat b/test/test_nm.bat index 0d63b650..428eb741 100644 --- a/test/test_nm.bat +++ b/test/test_nm.bat @@ -17,6 +17,10 @@ if /i "%1"=="Y" ( set OPT2=-DUSE_YASM -DXBYAK64 set OPT3=win64 set FILTER=normalize_prefix +) else if /i "%1"=="noexcept" ( + set EXE=nasm.exe + set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION + set OPT3=win32 ) else ( set EXE=nasm.exe set OPT2=-DXBYAK32 @@ -27,7 +31,7 @@ bmake -f Makefile.win all echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs cl -I../ make_nm.cpp %OPT% %OPT2% /EHs make_nm > a.asm -rm a.lst +rm -rf a.lst echo %EXE% -f %OPT3% -l a.lst a.asm %EXE% -f %OPT3% -l a.lst a.asm rem connect "?????-" and "??" @@ -39,5 +43,4 @@ if /i "%Y%"=="1" ( make_nm jit > nm.cpp cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% nm_frame |%FILTER% > x.lst -diff -w x.lst ok.lst -wc x.lst +diff -wb x.lst ok.lst && echo "ok" diff --git a/test/test_nm.sh b/test/test_nm.sh index 6001ace9..3e328012 100755 --- a/test/test_nm.sh +++ b/test/test_nm.sh @@ -25,6 +25,11 @@ else if ($1 == "avx512") then set OPT2="-DXBYAK64 -DUSE_AVX512" set OPT3=win64 set FILTER=./normalize_prefix +else if ($1 == "noexcept") then + echo "nasm(32bit) without exception" + set EXE=nasm + set OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION" + set OPT3=win32 else echo "nasm(32bit)" set EXE=nasm @@ -33,18 +38,17 @@ else endif set CFLAGS="-Wall -fno-operator-names -I../ $OPT2" -echo "compile make_nm.cpp" +echo "compile make_nm.cpp with $CFLAGS" g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/xbyak.sln b/xbyak.sln index 0f23415b..b20afd6f 100644 --- a/xbyak.sln +++ b/xbyak.sln @@ -1,19 +1,20 @@ -þ½Ž¿ -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}" +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28010.2016 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcxproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcxproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcxproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcxproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcxproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcxproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcxproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -83,4 +84,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {DAE0012B-DDCC-4614-9110-D52E351B2A80} + EndGlobalSection EndGlobal diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index f768927e..4310455b 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -9,10 +9,8 @@ @note modified new BSD license http://opensource.org/licenses/BSD-3-Clause */ -#ifndef XBYAK_NO_OP_NAMES - #if not +0 // trick to detect whether 'not' is operator or not - #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." - #endif +#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not + #define XBYAK_NO_OP_NAMES #endif #include // for debug print @@ -26,7 +24,9 @@ // #define XBYAK_DISABLE_AVX512 -//#define XBYAK_USE_MMAP_ALLOCATOR +#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR) + #define XBYAK_USE_MMAP_ALLOCATOR +#endif #if !defined(__GNUC__) || defined(__MINGW32__) #undef XBYAK_USE_MMAP_ALLOCATOR #endif @@ -72,13 +72,24 @@ #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap #endif #ifdef _WIN32 - #include + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif #include #include + #define XBYAK_TLS __declspec(thread) #elif defined(__GNUC__) #include #include #include + #define XBYAK_TLS __thread +#endif +#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT) + #define XBYAK_USE_MAP_JIT + #include + #ifndef MAP_JIT + #define MAP_JIT 0x800 + #endif #endif #if !defined(_MSC_VER) || (_MSC_VER >= 1600) #include @@ -98,7 +109,18 @@ #endif #if (__cplusplus >= 201103) || (_MSC_VER >= 1800) + #undef XBYAK_TLS + #define XBYAK_TLS thread_local #define XBYAK_VARIADIC_TEMPLATE + #define XBYAK_NOEXCEPT noexcept +#else + #define XBYAK_NOEXCEPT throw() +#endif + +#if (__cplusplus >= 201402L) || (_MSC_VER >= 1910) // Visual Studio 2017 version 15.0 + #define XBYAK_CONSTEXPR constexpr // require c++14 or later +#else + #define XBYAK_CONSTEXPR #endif #ifdef _MSC_VER @@ -113,21 +135,17 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5780 /* 0xABCD = A.BC(D) */ + VERSION = 0x5970 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED #define MIE_INTEGER_TYPE_DEFINED -#ifdef _MSC_VER - typedef unsigned __int64 uint64; - typedef __int64 sint64; -#else - typedef uint64_t uint64; - typedef int64_t sint64; -#endif -typedef unsigned int uint32; -typedef unsigned short uint16; -typedef unsigned char uint8; +// for backward compatibility +typedef uint64_t uint64; +typedef int64_t sint64; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; #endif #ifndef MIE_ALIGN @@ -186,81 +204,120 @@ enum { ERR_INVALID_ZERO, ERR_INVALID_RIP_IN_AUTO_GROW, ERR_INVALID_MIB_ADDRESS, - ERR_INTERNAL, - ERR_X2APIC_IS_NOT_SUPPORTED + ERR_X2APIC_IS_NOT_SUPPORTED, + ERR_NOT_SUPPORTED, + ERR_INTERNAL // Put it at last. }; +inline const char *ConvertErrorToString(int err) +{ + static const char *errTbl[] = { + "none", + "bad addressing", + "code is too big", + "bad scale", + "esp can't be index", + "bad combination", + "bad size of register", + "imm is too big", + "bad align", + "label is redefined", + "label is too far", + "label is not found", + "code is not copyable", + "bad parameter", + "can't protect", + "can't use 64bit disp(use (void*))", + "offset is too big", + "MEM size is not specified", + "bad mem size", + "bad st combination", + "over local label", + "under local label", + "can't alloc", + "T_SHORT is not supported in AutoGrow", + "bad protect mode", + "bad pNum", + "bad tNum", + "bad vsib addressing", + "can't convert", + "label is not set by L()", + "label is already set by L()", + "bad label string", + "err munmap", + "opmask is already set", + "rounding is already set", + "k0 is invalid", + "evex is invalid", + "sae(suppress all exceptions) is invalid", + "er(embedded rounding) is invalid", + "invalid broadcast", + "invalid opmask with memory", + "invalid zero", + "invalid rip in AutoGrow", + "invalid mib address", + "x2APIC is not supported", + "not supported", + "internal error" + }; + assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); + return err <= ERR_INTERNAL ? errTbl[err] : "unknown err"; +} + +#ifdef XBYAK_NO_EXCEPTION +namespace local { + +inline int& GetErrorRef() { + static XBYAK_TLS int err = 0; + return err; +} + +inline void SetError(int err) { + if (local::GetErrorRef()) return; // keep the first err code + local::GetErrorRef() = err; +} + +} // local + +inline void ClearError() { + local::GetErrorRef() = 0; +} +inline int GetError() { return local::GetErrorRef(); } + +#define XBYAK_THROW(err) { local::SetError(err); return; } +#define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; } + +#else class Error : public std::exception { int err_; public: explicit Error(int err) : err_(err) { if (err_ < 0 || err_ > ERR_INTERNAL) { - fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_); - exit(1); + err_ = ERR_INTERNAL; } } operator int() const { return err_; } - const char *what() const throw() + const char *what() const XBYAK_NOEXCEPT { - static const char *errTbl[] = { - "none", - "bad addressing", - "code is too big", - "bad scale", - "esp can't be index", - "bad combination", - "bad size of register", - "imm is too big", - "bad align", - "label is redefined", - "label is too far", - "label is not found", - "code is not copyable", - "bad parameter", - "can't protect", - "can't use 64bit disp(use (void*))", - "offset is too big", - "MEM size is not specified", - "bad mem size", - "bad st combination", - "over local label", - "under local label", - "can't alloc", - "T_SHORT is not supported in AutoGrow", - "bad protect mode", - "bad pNum", - "bad tNum", - "bad vsib addressing", - "can't convert", - "label is not set by L()", - "label is already set by L()", - "bad label string", - "err munmap", - "opmask is already set", - "rounding is already set", - "k0 is invalid", - "evex is invalid", - "sae(suppress all exceptions) is invalid", - "er(embedded rounding) is invalid", - "invalid broadcast", - "invalid opmask with memory", - "invalid zero", - "invalid rip in AutoGrow", - "invalid mib address", - "internal error", - "x2APIC is not supported" - }; - assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); - return errTbl[err_]; + return ConvertErrorToString(err_); } }; +// dummy functions +inline void ClearError() { } +inline int GetError() { return 0; } + inline const char *ConvertErrorToString(const Error& err) { return err.what(); } +#define XBYAK_THROW(err) { throw Error(err); } +#define XBYAK_THROW_RET(err, r) { throw Error(err); } + +#endif + inline void *AlignedMalloc(size_t size, size_t alignment) { #ifdef __MINGW32__ @@ -286,7 +343,7 @@ inline void AlignedFree(void *p) } template -inline const To CastTo(From p) throw() +inline const To CastTo(From p) XBYAK_NOEXCEPT { return (const To)(size_t)(p); } @@ -294,15 +351,15 @@ namespace inner { static const size_t ALIGN_PAGE_SIZE = 4096; -inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } -inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } +inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; } +inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } -inline uint32 VerifyInInt32(uint64 x) +inline uint32_t VerifyInInt32(uint64_t x) { #ifdef XBYAK64 - if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0) #endif - return static_cast(x); + return static_cast(x); } enum LabelMode { @@ -317,23 +374,50 @@ enum LabelMode { custom allocator */ struct Allocator { - virtual uint8 *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } - virtual void free(uint8 *p) { AlignedFree(p); } + virtual uint8_t *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } + virtual void free(uint8_t *p) { AlignedFree(p); } virtual ~Allocator() {} /* override to return false if you call protect() manually */ virtual bool useProtect() const { return true; } }; #ifdef XBYAK_USE_MMAP_ALLOCATOR +#ifdef XBYAK_USE_MAP_JIT +namespace util { + +inline int getMacOsVersionPure() +{ + char buf[64]; + size_t size = sizeof(buf); + int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0); + if (err != 0) return 0; + char *endp; + int major = strtol(buf, &endp, 10); + if (*endp != '.') return 0; + return major; +} + +inline int getMacOsVersion() +{ + static const int version = getMacOsVersionPure(); + return version; +} + +} // util +#endif class MmapAllocator : Allocator { typedef XBYAK_STD_UNORDERED_MAP SizeList; SizeList sizeList_; public: - uint8 *alloc(size_t size) + uint8_t *alloc(size_t size) { const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; size = (size + alignedSizeM1) & ~alignedSizeM1; -#ifdef MAP_ANONYMOUS +#if defined(XBYAK_USE_MAP_JIT) + int mode = MAP_PRIVATE | MAP_ANONYMOUS; + const int mojaveVersion = 18; + if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; +#elif defined(MAP_ANONYMOUS) const int mode = MAP_PRIVATE | MAP_ANONYMOUS; #elif defined(MAP_ANON) const int mode = MAP_PRIVATE | MAP_ANON; @@ -341,17 +425,17 @@ public: #error "not supported" #endif void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0); - if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC); + if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) assert(p); sizeList_[(uintptr_t)p] = size; - return (uint8*)p; + return (uint8_t*)p; } - void free(uint8 *p) + void free(uint8_t *p) { if (p == 0) return; SizeList::iterator i = sizeList_.find((uintptr_t)p); - if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER); - if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP); + if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER) + if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP) sizeList_.erase(i); } }; @@ -361,10 +445,10 @@ class Address; class Reg; class Operand { - static const uint8 EXT8BIT = 0x20; + static const uint8_t EXT8BIT = 0x20; unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil - unsigned int kind_:9; - unsigned int bit_:10; + unsigned int kind_:10; + unsigned int bit_:14; protected: unsigned int zero_:1; unsigned int mask_:3; @@ -381,7 +465,8 @@ public: YMM = 1 << 5, ZMM = 1 << 6, OPMASK = 1 << 7, - BNDREG = 1 << 8 + BNDREG = 1 << 8, + TMM = 1 << 9 }; enum Code { #ifdef XBYAK64 @@ -395,55 +480,55 @@ public: AX = 0, CX, DX, BX, SP, BP, SI, DI, AL = 0, CL, DL, BL, AH, CH, DH, BH }; - Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { } - Operand(int idx, Kind kind, int bit, bool ext8bit = 0) - : idx_(static_cast(idx | (ext8bit ? EXT8BIT : 0))) + XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { } + XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0) + : idx_(static_cast(idx | (ext8bit ? EXT8BIT : 0))) , kind_(kind) , bit_(bit) , zero_(0), mask_(0), rounding_(0) { assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two } - Kind getKind() const { return static_cast(kind_); } - int getIdx() const { return idx_ & (EXT8BIT - 1); } - bool isNone() const { return kind_ == 0; } - bool isMMX() const { return is(MMX); } - bool isXMM() const { return is(XMM); } - bool isYMM() const { return is(YMM); } - bool isZMM() const { return is(ZMM); } - bool isXMEM() const { return is(XMM | MEM); } - bool isYMEM() const { return is(YMM | MEM); } - bool isZMEM() const { return is(ZMM | MEM); } - bool isOPMASK() const { return is(OPMASK); } - bool isBNDREG() const { return is(BNDREG); } - bool isREG(int bit = 0) const { return is(REG, bit); } - bool isMEM(int bit = 0) const { return is(MEM, bit); } - bool isFPU() const { return is(FPU); } - bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; } - bool isExtIdx() const { return (getIdx() & 8) != 0; } - bool isExtIdx2() const { return (getIdx() & 16) != 0; } - bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); } - bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); } - bool hasZero() const { return zero_; } - int getOpmaskIdx() const { return mask_; } - int getRounding() const { return rounding_; } + XBYAK_CONSTEXPR Kind getKind() const { return static_cast(kind_); } + XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); } + XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; } + XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); } + XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); } + XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); } + XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); } + XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); } + XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); } + XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); } + XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); } + XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); } + XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); } + XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); } + XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); } + XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); } + XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; } + XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; } + XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; } + XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); } + XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); } + XBYAK_CONSTEXPR bool hasZero() const { return zero_; } + XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; } + XBYAK_CONSTEXPR int getRounding() const { return rounding_; } void setKind(Kind kind) { - if ((kind & (XMM|YMM|ZMM)) == 0) return; + if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return; kind_ = kind; - bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512; + bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192; } // err if MMX/FPU/OPMASK/BNDREG void setBit(int bit); - void setOpmaskIdx(int idx, bool ignore_idx0 = false) + void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true) { - if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID); - if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET); + if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) mask_ = idx; } void setRounding(int idx) { - if (rounding_) throw Error(ERR_ROUNDING_IS_ALREADY_SET); + if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET) rounding_ = idx; } void setZero() { zero_ = true; } @@ -456,12 +541,12 @@ public: return AH <= idx && idx <= BH; } // any bit is accetable if bit == 0 - bool is(int kind, uint32 bit = 0) const + XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const { return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16) } - bool isBit(uint32 bit) const { return (bit_ & bit) != 0; } - uint32 getBit() const { return bit_; } + XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; } + XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; } const char *toString() const { const int idx = getIdx(); @@ -480,6 +565,11 @@ public: } else if (isOPMASK()) { static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }; return tbl[idx]; + } else if (isTMM()) { + static const char *tbl[8] = { + "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7" + }; + return tbl[idx]; } else if (isZMM()) { static const char *tbl[32] = { "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", @@ -508,7 +598,7 @@ public: static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" }; return tbl[idx]; } - throw Error(ERR_INTERNAL); + XBYAK_THROW_RET(ERR_INTERNAL, 0); } bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; } bool operator==(const Operand& rhs) const; @@ -519,13 +609,13 @@ public: inline void Operand::setBit(int bit) { - if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR; + if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR; if (isBit(bit)) return; - if (is(MEM)) { + if (is(MEM | OPMASK)) { bit_ = bit; return; } - if (is(REG | XMM | YMM | ZMM)) { + if (is(REG | XMM | YMM | ZMM | TMM)) { int idx = getIdx(); // err if converting ah, bh, ch, dh if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR; @@ -547,16 +637,18 @@ inline void Operand::setBit(int bit) case 128: kind = XMM; break; case 256: kind = YMM; break; case 512: kind = ZMM; break; + case 8192: kind = TMM; break; } idx_ = idx; kind_ = kind; bit_ = bit; + if (bit >= 128) return; // keep mask_ and rounding_ mask_ = 0; rounding_ = 0; return; } ERR: - throw Error(ERR_CANT_CONVERT); + XBYAK_THROW(ERR_CANT_CONVERT) } class Label; @@ -569,17 +661,17 @@ struct Reg64; #endif class Reg : public Operand { public: - Reg() { } - Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } + XBYAK_CONSTEXPR Reg() { } + XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; } - uint8 getRexW() const { return isREG(64) ? 8 : 0; } - uint8 getRexR() const { return isExtIdx() ? 4 : 0; } - uint8 getRexX() const { return isExtIdx() ? 2 : 0; } - uint8 getRexB() const { return isExtIdx() ? 1 : 0; } - uint8 getRex(const Reg& base = Reg()) const + uint8_t getRexW() const { return isREG(64) ? 8 : 0; } + uint8_t getRexR() const { return isExtIdx() ? 4 : 0; } + uint8_t getRexX() const { return isExtIdx() ? 2 : 0; } + uint8_t getRexB() const { return isExtIdx() ? 1 : 0; } + uint8_t getRex(const Reg& base = Reg()) const { - uint8 rex = getRexW() | getRexR() | base.getRexW() | base.getRexB(); + uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB(); if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40; return rex; } @@ -598,15 +690,15 @@ inline const Reg& Operand::getReg() const } struct Reg8 : public Reg { - explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } + explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } }; struct Reg16 : public Reg { - explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } + explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } }; struct Mmx : public Reg { - explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } + explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } }; struct EvexModifierRounding { @@ -617,35 +709,41 @@ struct EvexModifierRounding { T_RZ_SAE = 4, T_SAE = 5 }; - explicit EvexModifierRounding(int rounding) : rounding(rounding) {} + explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {} int rounding; }; -struct EvexModifierZero{EvexModifierZero() {}}; +struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}}; struct Xmm : public Mmx { - explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } - Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } + explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } + XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; } Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; } Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; } }; struct Ymm : public Xmm { - explicit Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } + explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; } }; struct Zmm : public Ymm { - explicit Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } + explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; } }; +#ifdef XBYAK64 +struct Tmm : public Reg { + explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { } +}; +#endif + struct Opmask : public Reg { - explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} + explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} }; struct BoundsReg : public Reg { - explicit BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} + explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} }; templateT operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; } @@ -653,43 +751,43 @@ templateT operator|(const T& x, const EvexModifierZero&) { T r(x); r.se templateT operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; } struct Fpu : public Reg { - explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } + explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } }; struct Reg32e : public Reg { - explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} + explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} }; struct Reg32 : public Reg32e { - explicit Reg32(int idx = 0) : Reg32e(idx, 32) {} + explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {} }; #ifdef XBYAK64 struct Reg64 : public Reg32e { - explicit Reg64(int idx = 0) : Reg32e(idx, 64) {} + explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {} }; struct RegRip { - sint64 disp_; + int64_t disp_; const Label* label_; bool isAddr_; - explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} + explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} friend const RegRip operator+(const RegRip& r, int disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } friend const RegRip operator-(const RegRip& r, int disp) { return RegRip(r.disp_ - disp, r.label_, r.isAddr_); } - friend const RegRip operator+(const RegRip& r, sint64 disp) { + friend const RegRip operator+(const RegRip& r, int64_t disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } - friend const RegRip operator-(const RegRip& r, sint64 disp) { + friend const RegRip operator-(const RegRip& r, int64_t disp) { return RegRip(r.disp_ - disp, r.label_, r.isAddr_); } friend const RegRip operator+(const RegRip& r, const Label& label) { - if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING); + if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); return RegRip(r.disp_, &label); } friend const RegRip operator+(const RegRip& r, const void *addr) { - if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING); - return RegRip(r.disp_ + (sint64)addr, 0, true); + if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); + return RegRip(r.disp_ + (int64_t)addr, 0, true); } }; #endif @@ -724,7 +822,7 @@ public: enum { es, cs, ss, ds, fs, gs }; - explicit Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } + explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } int getIdx() const { return idx_; } const char *toString() const { @@ -743,14 +841,14 @@ public: #else enum { i32e = 32 }; #endif - RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } - RegExp(const Reg& r, int scale = 1) + XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } + XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) : scale_(scale) , disp_(0) { - if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (scale == 0) return; - if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE); + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE) if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index index_ = r; } else { @@ -776,20 +874,20 @@ public: const Reg& getIndex() const { return index_; } int getScale() const { return scale_; } size_t getDisp() const { return disp_; } - void verify() const + XBYAK_CONSTEXPR void verify() const { - if (base_.getBit() >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (index_.getBit() && index_.getBit() <= 64) { - if (index_.getIdx() == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX); - if (base_.getBit() && base_.getBit() != index_.getBit()) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX) + if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } friend RegExp operator+(const RegExp& a, const RegExp& b); friend RegExp operator-(const RegExp& e, size_t disp); - uint8 getRex() const + uint8_t getRex() const { - uint8 rex = index_.getRexX() | base_.getRexB(); - return rex ? uint8(rex | 0x40) : 0; + uint8_t rex = index_.getRexX() | base_.getRexB(); + return rex ? uint8_t(rex | 0x40) : 0; } private: /* @@ -804,12 +902,12 @@ private: inline RegExp operator+(const RegExp& a, const RegExp& b) { - if (a.index_.getBit() && b.index_.getBit()) throw Error(ERR_BAD_ADDRESSING); + if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) RegExp ret = a; if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; } if (b.base_.getBit()) { if (ret.base_.getBit()) { - if (ret.index_.getBit()) throw Error(ERR_BAD_ADDRESSING); + if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) // base + base => base + index * 1 ret.index_ = b.base_; // [reg + esp] => [esp + reg] @@ -853,9 +951,9 @@ class CodeArray { inner::LabelMode mode; AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} - uint64 getVal(const uint8 *top) const + uint64_t getVal(const uint8_t *top) const { - uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); + uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); if (jmpSize == 4) disp = inner::VerifyInInt32(disp); return disp; } @@ -871,7 +969,7 @@ class CodeArray { Allocator *alloc_; protected: size_t maxSize_; - uint8 *top_; + uint8_t *top_; size_t size_; bool isCalledCalcJmpAddress_; @@ -882,8 +980,8 @@ protected: void growMemory() { const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); - uint8 *newTop = alloc_->alloc(newSize); - if (newTop == 0) throw Error(ERR_CANT_ALLOC); + uint8_t *newTop = alloc_->alloc(newSize); + if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC) for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; alloc_->free(top_); top_ = newTop; @@ -896,7 +994,7 @@ protected: { if (isCalledCalcJmpAddress_) return; for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { - uint64 disp = i->getVal(top_); + uint64_t disp = i->getVal(top_); rewrite(i->codeOffset, disp, i->jmpSize); } isCalledCalcJmpAddress_ = true; @@ -911,14 +1009,14 @@ public: : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF) , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) , maxSize_(maxSize) - , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) + , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) , size_(0) , isCalledCalcJmpAddress_(false) { - if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC); + if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC) if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) { alloc_->free(top_); - throw Error(ERR_CANT_PROTECT); + XBYAK_THROW(ERR_CANT_PROTECT) } } virtual ~CodeArray() @@ -932,7 +1030,7 @@ public: { bool isOK = protect(top_, maxSize_, mode); if (isOK) return true; - if (throwException) throw Error(ERR_CANT_PROTECT); + if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false) return false; } bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); } @@ -949,38 +1047,38 @@ public: if (type_ == AUTO_GROW) { growMemory(); } else { - throw Error(ERR_CODE_IS_TOO_BIG); + XBYAK_THROW(ERR_CODE_IS_TOO_BIG) } } - top_[size_++] = static_cast(code); + top_[size_++] = static_cast(code); } - void db(const uint8 *code, size_t codeSize) + void db(const uint8_t *code, size_t codeSize) { for (size_t i = 0; i < codeSize; i++) db(code[i]); } - void db(uint64 code, size_t codeSize) + void db(uint64_t code, size_t codeSize) { - if (codeSize > 8) throw Error(ERR_BAD_PARAMETER); - for (size_t i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); + if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER) + for (size_t i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); } - void dw(uint32 code) { db(code, 2); } - void dd(uint32 code) { db(code, 4); } - void dq(uint64 code) { db(code, 8); } - const uint8 *getCode() const { return top_; } + void dw(uint32_t code) { db(code, 2); } + void dd(uint32_t code) { db(code, 4); } + void dq(uint64_t code) { db(code, 8); } + const uint8_t *getCode() const { return top_; } template const F getCode() const { return reinterpret_cast(top_); } - const uint8 *getCurr() const { return &top_[size_]; } + const uint8_t *getCurr() const { return &top_[size_]; } template const F getCurr() const { return reinterpret_cast(&top_[size_]); } size_t getSize() const { return size_; } void setSize(size_t size) { - if (size > maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) size_ = size; } void dump() const { - const uint8 *p = getCode(); + const uint8_t *p = getCode(); size_t bufSize = getSize(); size_t remain = bufSize; for (int i = 0; i < 4; i++) { @@ -1005,13 +1103,13 @@ public: @param disp [in] offset from the next of jmp @param size [in] write size(1, 2, 4, 8) */ - void rewrite(size_t offset, uint64 disp, size_t size) + void rewrite(size_t offset, uint64_t disp, size_t size) { assert(offset < maxSize_); - if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER); - uint8 *const data = top_ + offset; + if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER) + uint8_t *const data = top_ + offset; for (size_t i = 0; i < size; i++) { - data[i] = static_cast(disp >> (i * 8)); + data[i] = static_cast(disp >> (i * 8)); } } void save(size_t offset, size_t val, int size, inner::LabelMode mode) @@ -1068,9 +1166,9 @@ public: @param alignedSize [in] power of two @return aligned addr by alingedSize */ - static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16) + static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16) { - return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); + return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); } }; @@ -1082,15 +1180,15 @@ public: M_rip, M_ripAddr }; - Address(uint32 sizeBit, bool broadcast, const RegExp& e) + XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast) { e_.verify(); } #ifdef XBYAK64 - explicit Address(size_t disp) + explicit XBYAK_CONSTEXPR Address(size_t disp) : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ } - Address(uint32 sizeBit, bool broadcast, const RegRip& addr) + XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr) : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { } #endif RegExp getRegExp(bool optimize = true) const @@ -1101,7 +1199,7 @@ public: bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax size_t getDisp() const { return e_.getDisp(); } - uint8 getRex() const + uint8_t getRex() const { if (mode_ != M_ModRM) return 0; return getRegExp().getRex(); @@ -1138,9 +1236,9 @@ class AddressFrame { void operator=(const AddressFrame&); AddressFrame(const AddressFrame&); public: - const uint32 bit_; + const uint32_t bit_; const bool broadcast_; - explicit AddressFrame(uint32 bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } + explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } Address operator[](const RegExp& e) const { return Address(bit_, broadcast_, e); @@ -1150,7 +1248,7 @@ public: return Address(bit_, broadcast_, RegExp(reinterpret_cast(disp))); } #ifdef XBYAK64 - Address operator[](uint64 disp) const { return Address(disp); } + Address operator[](uint64_t disp) const { return Address(disp); } Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); } #endif }; @@ -1179,7 +1277,7 @@ public: ~Label(); void clear() { mgr = 0; id = 0; } int getId() const { return id; } - const uint8 *getAddress() const; + const uint8_t *getAddress() const; // backward compatibility static inline std::string toStr(int num) @@ -1237,7 +1335,7 @@ class LabelManager { // add label typename DefList::value_type item(labelId, addrOffset); std::pair ret = defList.insert(item); - if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED); + if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED) // search undefined label for (;;) { typename UndefList::iterator itr = undefList.find(labelId); @@ -1252,9 +1350,9 @@ class LabelManager { } else { disp = addrOffset - jmp->endOfJmp + jmp->disp; #ifdef XBYAK64 - if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) #endif - if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR); + if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) } if (base_->isAutoGrow()) { base_->save(offset, disp, jmp->jmpSize, jmp->mode); @@ -1326,6 +1424,7 @@ public: clabelDefList_.clear(); clabelUndefList_.clear(); resetLabelPtrList(); + ClearError(); } void enterLocal() { @@ -1333,14 +1432,14 @@ public: } void leaveLocal() { - if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL); - if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND); + if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL) + if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) stateList_.pop_back(); } void set(CodeArray *base) { base_ = base; } void defineSlabel(std::string label) { - if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR); + if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR) if (label == "@@") { SlabelDefList& defList = stateList_.front().defList; SlabelDefList::iterator i = defList.find("@f"); @@ -1367,7 +1466,7 @@ public: void assign(Label& dst, const Label& src) { ClabelDefList::const_iterator i = clabelDefList_.find(src.id); - if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); + if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L) define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); dst.mgr = this; labelPtrList_.insert(&dst); @@ -1379,7 +1478,7 @@ public: if (defList.find("@f") != defList.end()) { label = "@f"; } else if (defList.find("@b") == defList.end()) { - throw Error(ERR_LABEL_IS_NOT_FOUND); + XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false) } } else if (label == "@f") { if (defList.find("@f") != defList.end()) { @@ -1410,7 +1509,7 @@ public: return false; } bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } - const uint8 *getCode() const { return base_->getCode(); } + const uint8_t *getCode() const { return base_->getCode(); } bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); } }; @@ -1422,7 +1521,7 @@ inline Label::Label(const Label& rhs) } inline Label& Label::operator=(const Label& rhs) { - if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); + if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this) id = rhs.id; mgr = rhs.mgr; if (mgr) mgr->incRefCount(id, this); @@ -1432,7 +1531,7 @@ inline Label::~Label() { if (id && mgr) mgr->decRefCount(id, this); } -inline const uint8* Label::getAddress() const +inline const uint8_t* Label::getAddress() const { if (mgr == 0 || !mgr->isReady()) return 0; size_t offset; @@ -1451,7 +1550,7 @@ private: CodeGenerator operator=(const CodeGenerator&); // don't call #ifdef XBYAK64 enum { i32e = 32 | 64, BIT = 64 }; - static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788; + static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull); typedef Reg64 NativeReg; #else enum { i32e = 32, BIT = 32 }; @@ -1495,10 +1594,10 @@ private: } void rex(const Operand& op1, const Operand& op2 = Operand()) { - uint8 rex = 0; + uint8_t rex = 0; const Operand *p1 = &op1, *p2 = &op2; if (p1->isMEM()) std::swap(p1, p2); - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) if (p2->isMEM()) { const Address& addr = p2->getAddress(); if (BIT == 64 && addr.is32bit()) db(0x67); @@ -1558,13 +1657,13 @@ private: bool r = reg.isExtIdx(); bool b = base.isExtIdx(); int idx = v ? v->getIdx() : 0; - if ((idx | reg.getIdx() | base.getIdx()) >= 16) throw Error(ERR_BAD_COMBINATION); - uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; - uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; + if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION) + uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; + uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; if (!b && !x && !w && (type & T_0F)) { db(0xC5); db((r ? 0 : 0x80) | vvvv); } else { - uint32 mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); } db(code); @@ -1572,29 +1671,29 @@ private: void verifySAE(const Reg& r, int type) const { if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return; - throw Error(ERR_SAE_IS_INVALID); + XBYAK_THROW(ERR_SAE_IS_INVALID) } void verifyER(const Reg& r, int type) const { if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; - throw Error(ERR_ER_IS_INVALID); + XBYAK_THROW(ERR_ER_IS_INVALID) } // (a, b, c) contains non zero two or three values then err int verifyDuplicate(int a, int b, int c, int err) { int v = a | b | c; - if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err); + if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0) return v; } - int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false) + int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) { - if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID); + if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) int w = (type & T_EW1) ? 1 : 0; - uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; - uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; + uint32_t mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; int idx = v ? v->getIdx() : 0; - uint32 vvvv = ~idx; + uint32_t vvvv = ~idx; bool R = !reg.isExtIdx(); bool X = x ? false : !base.isExtIdx2(); @@ -1632,6 +1731,7 @@ private: bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx); bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); + if (aaa == 0) z = 0; // clear T_z if mask is not set db(0x62); db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3)); db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3)); @@ -1641,16 +1741,16 @@ private: } void setModRM(int mod, int r1, int r2) { - db(static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); + db(static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); } void setSIB(const RegExp& e, int reg, int disp8N = 0) { - size_t disp64 = e.getDisp(); + uint64_t disp64 = e.getDisp(); #ifdef XBYAK64 - size_t high = disp64 >> 32; - if (high != 0 && high != 0xFFFFFFFF) throw Error(ERR_OFFSET_IS_TOO_BIG); + uint64_t high = disp64 >> 32; + if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) #endif - uint32 disp = static_cast(disp64); + uint32_t disp = static_cast(disp64); const Reg& base = e.getBase(); const Reg& index = e.getIndex(); const int baseIdx = base.getIdx(); @@ -1669,7 +1769,7 @@ private: } } else { // disp must be casted to signed - uint32 t = static_cast(static_cast(disp) / disp8N); + uint32_t t = static_cast(static_cast(disp) / disp8N); if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) { disp = t; mod = mod01; @@ -1699,7 +1799,7 @@ private: } } LabelManager labelMgr_; - bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } + bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) { rex(reg2, reg1); @@ -1708,23 +1808,31 @@ private: } void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) rex(addr, reg); db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); opAddr(addr, reg.getIdx(), immSize); } + void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE) + { + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + rex(addr, reg); + db(code0); if (code1 != NONE) db(code1); + opAddr(addr, reg.getIdx()); + } void opMIB(const Address& addr, const Reg& reg, int code0, int code1) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - if (addr.getMode() != Address::M_ModRM) throw Error(ERR_INVALID_MIB_ADDRESS); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) if (BIT == 64 && addr.is32bit()) db(0x67); const RegExp& regExp = addr.getRegExp(false); - uint8 rex = regExp.getRex(); + uint8_t rex = regExp.getRex(); if (rex) db(rex); db(code0); db(code1); setSIB(regExp, reg.getIdx()); } - void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) { const int shortJmpSize = 2; const int longHeaderSize = longPref ? 2 : 1; @@ -1732,13 +1840,14 @@ private: if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { db(shortCode); db(disp - shortJmpSize); } else { - if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR); + if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) if (longPref) db(longPref); db(longCode); dd(disp - longJmpSize); } } + bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); } template - void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) { if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ size_t offset = 0; @@ -1746,7 +1855,7 @@ private: makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); } else { int jmpSize = 0; - if (type == T_NEAR) { + if (isNEAR(type)) { jmpSize = 4; if (longPref) db(longPref); db(longCode); dd(0); @@ -1758,17 +1867,17 @@ private: labelMgr_.addUndefinedLabel(label, jmp); } } - void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0) + void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0) { if (isAutoGrow()) { - if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); + if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW) if (size_ + 16 >= maxSize_) growMemory(); if (longPref) db(longPref); db(longCode); dd(0); save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); } else { - makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, longPref); + makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, longPref); } } @@ -1777,7 +1886,7 @@ private: // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false) { - if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) if (addr.getMode() == Address::M_ModRM) { setSIB(addr.getRegExp(), reg, disp8N); } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { @@ -1787,7 +1896,7 @@ private: } else { size_t disp = addr.getDisp(); if (addr.getMode() == Address::M_ripAddr) { - if (isAutoGrow()) throw Error(ERR_INVALID_RIP_IN_AUTO_GROW); + if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) disp -= (size_t)getCurr() + 4 + immSize; } dd(inner::VerifyInInt32(disp)); @@ -1797,7 +1906,7 @@ private: /* preCode is for SSSE3/SSE4 */ void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) { - if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION); + if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION) if (pref != NONE) db(pref); if (op.isMEM()) { opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0); @@ -1824,7 +1933,7 @@ private: } else if (op1.isMEM() && op2.isXMM()) { opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) @@ -1845,7 +1954,7 @@ private: } else if (op.isMEM()) { opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } void opShift(const Operand& op, int imm, int ext) @@ -1856,7 +1965,7 @@ private: } void opShift(const Operand& op, const Reg8& _cl, int ext) { - if (_cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) opR_ModM(op, 0, ext, 0xD2); } void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) @@ -1866,12 +1975,12 @@ private: } else if (condM) { opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } - void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *_cl = 0) + void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0) { - if (_cl && _cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1); if (!_cl) db(imm); } @@ -1885,12 +1994,12 @@ private: } } // (REG|MEM, IMM) - void opRM_I(const Operand& op, uint32 imm, int code, int ext) + void opRM_I(const Operand& op, uint32_t imm, int code, int ext) { verifyMemHasSize(op); - uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; + uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; if (op.isBit(8)) immBit = 8; - if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG); + if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al rex(op); @@ -1932,21 +2041,21 @@ private: return; } } - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } void verifyMemHasSize(const Operand& op) const { - if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED); + if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED) } /* mov(r, imm) = db(imm, mov_imm(r, imm)) */ - int mov_imm(const Reg& reg, size_t imm) + int mov_imm(const Reg& reg, uint64_t imm) { int bit = reg.getBit(); const int idx = reg.getIdx(); int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3); - if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) { + if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) { rex(Reg32(idx)); bit = 32; } else { @@ -1970,32 +2079,32 @@ private: if (relative) { db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize); } else if (isAutoGrow()) { - db(uint64(0), jmpSize); + db(uint64_t(0), jmpSize); save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); } else { db(size_t(top_) + offset, jmpSize); } return; } - db(uint64(0), jmpSize); + db(uint64_t(0), jmpSize); JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); labelMgr_.addUndefinedLabel(label, jmp); } - void opMovxx(const Reg& reg, const Operand& op, uint8 code) + void opMovxx(const Reg& reg, const Operand& op, uint8_t code) { - if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION); + if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) int w = op.isBit(16); #ifdef XBYAK64 - if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION); + if (op.isHigh8bit()) XBYAK_THROW(ERR_BAD_COMBINATION) #endif bool cond = reg.isREG() && (reg.getBit() > op.getBit()); opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); } - void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext) + void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; - if (!code) throw Error(ERR_BAD_MEM_SIZE); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; + if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE) if (m64ext && addr.isBit(64)) ext = m64ext; rex(addr, st0); @@ -2004,14 +2113,14 @@ private: } // use code1 if reg1 == st0 // use code2 if reg1 != st0 && reg2 == st0 - void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2) + void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2) { - uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; - if (!code) throw Error(ERR_BAD_ST_COMBINATION); - db(uint8(code >> 8)); - db(uint8(code | (reg1.getIdx() | reg2.getIdx()))); + uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; + if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION) + db(uint8_t(code >> 8)); + db(uint8_t(code | (reg1.getIdx() | reg2.getIdx()))); } - void opFpu(const Fpu& reg, uint8 code1, uint8 code2) + void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2) { db(code1); db(code2 | reg.getIdx()); } @@ -2027,10 +2136,10 @@ private: bool x = index.isExtIdx(); if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { int aaa = addr.getOpmaskIdx(); - if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY); + if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY) bool b = false; if (addr.isBroadcast()) { - if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST); + if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST) b = true; } int VL = regExp.isVsib() ? index.getBit() : 0; @@ -2052,13 +2161,13 @@ private: } // (r, r, r/m) if isR_R_RM // (r, r/m, r) - void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE) + void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE) { const Operand *p1 = &op1; const Operand *p2 = &op2; if (!isR_R_RM) std::swap(p1, p2); const unsigned int bit = r.getBit(); - if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION); + if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION) type |= (bit == 64) ? T_W1 : T_W0; opVex(r, p1, *p2, type, code, imm8); } @@ -2071,23 +2180,23 @@ private: op = &op1; } // (x1, x2, op) - if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION); + if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, x2, *op, type, code0, imm8); } void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE) { - if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION); + if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, &x2, op3, type, code0, imm8); } // (x, x/m), (y, x/m256), (z, y/m) void checkCvt1(const Operand& x, const Operand& op) const { - if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) } // (x, x/m), (x, y/m256), (y, z/m) void checkCvt2(const Xmm& x, const Operand& op) const { - if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION); + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) } void opCvt2(const Xmm& x, const Operand& op, int type, int code) { @@ -2095,9 +2204,9 @@ private: Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; opVex(x.copyAndSetKind(kind), &xm0, op, type, code); } - void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8 code) + void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code) { - if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) Xmm x(op.getIdx()); const Operand *p = op.isREG() ? &x : &op; opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); @@ -2112,18 +2221,18 @@ private: opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8); } // QQQ:need to refactor - void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1) + void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1) { - if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); - if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) if (is16bit) db(0x66); db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); } - void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode) + void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode) { const RegExp& regExp = addr.getRegExp(); - if (!regExp.isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) const int y_vx_y = 0; const int y_vy_y = 1; // const int x_vy_x = 2; @@ -2137,7 +2246,7 @@ private: } else { // x_vy_x isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); } - if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) } opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code); } @@ -2157,11 +2266,11 @@ private: case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return; break; } - throw Error(ERR_BAD_VSIB_ADDRESSING); + XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) } - void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode) + void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode) { - if (x.hasZero()) throw Error(ERR_INVALID_ZERO); + if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) checkGather2(x, addr.getRegExp().getIndex(), mode); opVex(x, 0, addr, type, code); } @@ -2169,21 +2278,52 @@ private: xx_xy_yz ; mode = true xx_xy_xz ; mode = false */ - void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, bool mode) + void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode) { if (mode) { - if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION) } else { - if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) } opVex(x, 0, op, type, code); } - void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8 code, Operand::Kind kind) + void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind) { - if (addr.hasZero()) throw Error(ERR_INVALID_ZERO); - if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) + if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) opVex(x, 0, addr, type, code); } + void opInOut(const Reg& a, const Reg& d, uint8_t code) + { + if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { + switch (a.getBit()) { + case 8: db(code); return; + case 16: db(0x66); db(code + 1); return; + case 32: db(code + 1); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } + void opInOut(const Reg& a, uint8_t code, uint8_t v) + { + if (a.getIdx() == Operand::AL) { + switch (a.getBit()) { + case 8: db(code); db(v); return; + case 16: db(0x66); db(code + 1); db(v); return; + case 32: db(code + 1); db(v); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } +#ifdef XBYAK64 + void opAMX(const Tmm& t1, const Address& addr, int type, int code0) + { + // require both base and index + const RegExp exp = addr.getRegExp(false); + if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED) + opVex(t1, &tmm0, addr, type, code0); + } +#endif public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -2193,7 +2333,7 @@ public: const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; - const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; + const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; const Reg16 ax, cx, dx, bx, sp, bp, si, di; const Reg8 al, cl, dl, bl, ah, ch, dh, bh; @@ -2219,6 +2359,7 @@ public: const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15; const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23; const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31; + const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7; const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; @@ -2233,6 +2374,9 @@ public: #ifndef XBYAK_DISABLE_SEGMENT const Segment es, cs, ss, ds, fs, gs; #endif +private: + bool isDefaultJmpNEAR_; +public: void L(const std::string& label) { labelMgr_.defineSlabel(label); } void L(Label& label) { labelMgr_.defineClabel(label); } Label L() { Label label; L(label); return label; } @@ -2252,6 +2396,8 @@ public: void putL(std::string label) { putL_inner(label); } void putL(const Label& label) { putL_inner(label); } + // set default type of `jmp` of undefined label to T_NEAR + void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; } void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); } void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } @@ -2274,7 +2420,7 @@ public: { opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84); } - void test(const Operand& op, uint32 imm) + void test(const Operand& op, uint32_t imm) { verifyMemHasSize(op); int immSize = (std::min)(op.getBit() / 8, 4U); @@ -2299,18 +2445,18 @@ public: } void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); } void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); } - void push(const AddressFrame& af, uint32 imm) + void push(const AddressFrame& af, uint32_t imm) { - if (af.bit_ == 8 && inner::IsInDisp8(imm)) { + if (af.bit_ == 8) { db(0x6A); db(imm); - } else if (af.bit_ == 16 && isInDisp16(imm)) { + } else if (af.bit_ == 16) { db(0x66); db(0x68); dw(imm); } else { db(0x68); dd(imm); } } /* use "push(word, 4)" if you want "push word 4" */ - void push(uint32 imm) + void push(uint32_t imm) { if (inner::IsInDisp8(imm)) { push(byte, imm); @@ -2322,7 +2468,7 @@ public: { const Reg *reg = 0; const Address *addr = 0; - uint8 code = 0; + uint8_t code = 0; if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp] reg = ®1.getReg(); addr= ®2.getAddress(); @@ -2340,21 +2486,21 @@ public: db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3); db(addr->getDisp(), 8); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } else #else if (code && addr->isOnlyDisp()) { rex(*reg, *addr); db(code | (reg->isBit(8) ? 0 : 1)); - dd(static_cast(addr->getDisp())); + dd(static_cast(addr->getDisp())); } else #endif { opRM_RM(reg1, reg2, 0x88); } } - void mov(const Operand& op, size_t imm) + void mov(const Operand& op, uint64_t imm) { if (op.isREG()) { const int size = mov_imm(op.getReg(), imm); @@ -2363,27 +2509,24 @@ public: verifyMemHasSize(op); int immSize = op.getBit() / 8; if (immSize <= 4) { - sint64 s = sint64(imm) >> (immSize * 8); - if (s != 0 && s != -1) throw Error(ERR_IMM_IS_TOO_BIG); + int64_t s = int64_t(imm) >> (immSize * 8); + if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) } else { - if (!inner::IsInInt32(imm)) throw Error(ERR_IMM_IS_TOO_BIG); + if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) immSize = 4; } opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize); - db(static_cast(imm), immSize); + db(static_cast(imm), immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } - void mov(const NativeReg& reg, const char *label) // can't use std::string - { - if (label == 0) { - mov(static_cast(reg), 0); // call imm - return; - } - mov_imm(reg, dummyAddr); - putL(label); - } + + // The template is used to avoid ambiguity when the 2nd argument is 0. + // When the 2nd argument is 0 the call goes to + // `void mov(const Operand& op, uint64_t imm)`. + template + void mov(const T1&, const T2 *) { T1::unexpected; } void mov(const NativeReg& reg, const Label& label) { mov_imm(reg, dummyAddr); @@ -2395,7 +2538,7 @@ public: if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { p1 = &op2; p2 = &op1; } - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) #ifdef XBYAK64 && (p2->getIdx() != 0 || !p1->isREG(32)) @@ -2425,7 +2568,7 @@ public: { switch (seg.getIdx()) { case Segment::es: db(0x07); break; - case Segment::cs: throw Error(ERR_BAD_COMBINATION); + case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION) case Segment::ss: db(0x17); break; case Segment::ds: db(0x1F); break; case Segment::fs: db(0x0F); db(0xA1); break; @@ -2495,6 +2638,7 @@ public: , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15) , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23) , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31) + , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7) // for my convenience , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23) @@ -2510,6 +2654,7 @@ public: #ifndef XBYAK_DISABLE_SEGMENT , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs) #endif + , isDefaultJmpNEAR_(false) { labelMgr_.set(this); } @@ -2526,7 +2671,7 @@ public: */ void ready(ProtectMode mode = PROTECT_RWE) { - if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND); + if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) if (isAutoGrow()) { calcJmpAddress(); if (useProtect()) setProtectMode(mode); @@ -2563,7 +2708,7 @@ public: AMD and Intel seem to agree on the same sequences for up to 9 bytes: https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf */ - static const uint8 nopTbl[9][9] = { + static const uint8_t nopTbl[9][9] = { {0x90}, {0x66, 0x90}, {0x0F, 0x1F, 0x00}, @@ -2577,7 +2722,7 @@ public: const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]); while (size > 0) { size_t len = (std::min)(n, size); - const uint8 *seq = nopTbl[len - 1]; + const uint8_t *seq = nopTbl[len - 1]; db(seq, len); size -= len; } @@ -2591,7 +2736,7 @@ public: void align(size_t x = 16, bool useMultiByteNop = true) { if (x == 1) return; - if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN); + if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN) if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x); size_t remain = size_t(getCurr()) % x; if (remain) { @@ -2601,39 +2746,48 @@ public: #endif }; +template <> +inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string +{ + assert(label); + mov_imm(reg, dummyAddr); + putL(label); +} + namespace util { -static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); -static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); -static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); -static const Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); -static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); -static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); -static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); -static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); -static const AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); -static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); -static const Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); -static const BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); -static const EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); -static const EvexModifierZero T_z; +static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); +static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); +static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); +static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); +static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); +static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); +static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); +static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); +static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); +static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); +static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); +static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); +static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); +static const XBYAK_CONSTEXPR EvexModifierZero T_z; #ifdef XBYAK64 -static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); -static const Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); -static const Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); -static const Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); -static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); -static const Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); -static const Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); -static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); -static const Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); -static const Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); -static const Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); -static const Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); -static const Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); -static const RegRip rip; +static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); +static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); +static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); +static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); +static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); +static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); +static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); +static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); +static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); +static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); +static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); +static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); +static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); +static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7); +static const XBYAK_CONSTEXPR RegRip rip; #endif #ifndef XBYAK_DISABLE_SEGMENT -static const Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); +static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); #endif } // util diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index bb40f8cc..85e8bed5 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,8 +1,8 @@ -const char *getVersionString() const { return "5.78"; } -void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } +const char *getVersionString() const { return "5.97"; } +void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } -void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); } +void add(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x00, 0); } void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); } void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); } void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); } @@ -16,8 +16,8 @@ void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void and_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x20, 4); } void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_0F38, 0xf2, true); } void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); } @@ -25,8 +25,8 @@ void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isX void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); } void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); } void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); } -void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); } @@ -45,20 +45,22 @@ void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); } void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); } void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xA3); } -void bt(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); } +void bt(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); } void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xBB); } -void btc(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); } +void btc(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); } void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xB3); } -void btr(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); } +void btr(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); } void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xAB); } -void bts(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); } +void bts(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); } void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf5, false); } void cbw() { db(0x66); db(0x98); } void cdq() { db(0x99); } void clc() { db(0xF8); } void cld() { db(0xFC); } void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } +void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); } void cli() { db(0xFA); } +void clzero() { db(0x0F); db(0x01); db(0xFC); } void cmc() { db(0xF5); } void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524 void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524 @@ -90,7 +92,7 @@ void cmovpe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | void cmovpo(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }//-V524 void cmovs(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 8); }//-V524 void cmovz(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }//-V524 -void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); } +void cmp(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x38, 7); } void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); } void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } @@ -120,12 +122,12 @@ void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } -void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } -void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } +void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } +void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } void cmpsb() { db(0xA6); } void cmpsd() { db(0xA7); } -void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } -void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } +void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } +void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } void cmpsw() { db(0x66); db(0xA7); } void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } @@ -167,10 +169,11 @@ void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); } void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); } void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); } -void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void emms() { db(0x0F); db(0x77); } -void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); } +void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); } +void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); } void f2xm1() { db(0xD9); db(0xF0); } void fabs() { db(0xD9); db(0xE1); } void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); } @@ -179,7 +182,10 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC void faddp() { db(0xDE); db(0xC1); } void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } +void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); } +void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); } void fchs() { db(0xD9); db(0xE0); } +void fclex() { db(0x9B); db(0xDB); db(0xE2); } void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } @@ -240,6 +246,7 @@ void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); } void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); } void fld1() { db(0xD9); db(0xE8); } void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); } +void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); } void fldl2e() { db(0xD9); db(0xEA); } void fldl2t() { db(0xD9); db(0xE9); } void fldlg2() { db(0xD9); db(0xEC); } @@ -252,22 +259,33 @@ void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC void fmulp() { db(0xDE); db(0xC9); } void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } +void fnclex() { db(0xDB); db(0xE2); } void fninit() { db(0xDB); db(0xE3); } void fnop() { db(0xD9); db(0xD0); } +void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); } +void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); } +void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); } +void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); } +void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); } void fpatan() { db(0xD9); db(0xF3); } void fprem() { db(0xD9); db(0xF8); } void fprem1() { db(0xD9); db(0xF5); } void fptan() { db(0xD9); db(0xF2); } void frndint() { db(0xD9); db(0xFC); } +void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); } +void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); } void fscale() { db(0xD9); db(0xFD); } void fsin() { db(0xD9); db(0xFE); } void fsincos() { db(0xD9); db(0xFB); } void fsqrt() { db(0xD9); db(0xFA); } void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); } void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); } -void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); } +void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); } +void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); } void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); } void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } +void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); } +void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); } void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); } void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } @@ -294,11 +312,12 @@ void fwait() { db(0x9B); } void fxam() { db(0xD9); db(0xE5); } void fxch() { db(0xD9); db(0xC9); } void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } +void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); } void fxtract() { db(0xD9); db(0xF4); } void fyl2x() { db(0xD9); db(0xF1); } void fyl2xp1() { db(0xD9); db(0xF9); } -void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); } void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); } @@ -306,8 +325,12 @@ void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXM void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); } void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); } void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); } +void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); } +void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); } void inc(const Operand& op) { opIncDec(op, 0x40, 0); } -void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void int3() { db(0xCC); } +void int_(uint8_t x) { db(0xCD); db(x); } void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524 void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524 @@ -431,12 +454,28 @@ void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0 void lahf() { db(0x9F); } void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); } void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); } -void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); } +void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); } +void leave() { db(0xC9); } void lfence() { db(0x0F); db(0xAE); db(0xE8); } +void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); } +void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); } void lock() { db(0xF0); } +void lodsb() { db(0xAC); } +void lodsd() { db(0xAD); } +void lodsw() { db(0x66); db(0xAD); } +void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loop(const char *label) { loop(std::string(label)); } +void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loope(const char *label) { loope(std::string(label)); } +void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void loopne(const char *label) { loopne(std::string(label)); } +void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); } void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); } -void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); } +void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); } void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); } void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); } void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); } @@ -447,6 +486,7 @@ void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXM void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); } void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); } void monitor() { db(0x0F); db(0x01); db(0xC8); } +void monitorx() { db(0x0F); db(0x01); db(0xFA); } void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); } void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); } void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); } @@ -476,7 +516,7 @@ void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); } void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); } void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); } -void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); } +void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); } void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); } void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); } void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); } @@ -495,7 +535,7 @@ void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); } void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); } void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); } void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); } -void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); } void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); } void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); } @@ -503,12 +543,18 @@ void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); } void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); } void mwait() { db(0x0F); db(0x01); db(0xC9); } +void mwaitx() { db(0x0F); db(0x01); db(0xFB); } void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); } void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } -void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +void or_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x08, 1); } void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); } void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); } +void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); } +void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); } +void outsb() { db(0x6E); } +void outsd() { db(0x6F); } +void outsw() { db(0x66); db(0x6F); } void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); } void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); } void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); } @@ -524,36 +570,36 @@ void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); } void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); } void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); } void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); } -void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } +void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); } void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); } void pause() { db(0xF3); db(0x90); } void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); } void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); } void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } -void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); } void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); } void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); } -void pcmpestri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void pcmpestrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); } void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); } void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); } void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); } -void pcmpistri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void pcmpistrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); } void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf5, true); } void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F3 | T_0F38, 0xf5, true); } -void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); } -void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); } -void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); } +void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); } +void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); } +void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); } void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); } void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); } void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); } @@ -561,9 +607,9 @@ void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); } void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); } void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); } -void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); } -void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); } -void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); } +void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); } +void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); } +void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); } void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); } void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); } void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); } @@ -609,10 +655,10 @@ void prefetchw(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x0D); } void prefetchwt1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x0D); } void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); } void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); } -void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } -void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } -void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } -void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } +void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } +void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } +void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } +void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); } void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); } void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); } @@ -661,21 +707,25 @@ void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); } void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } void rdmsr() { db(0x0F); db(0x32); } void rdpmc() { db(0x0F); db(0x33); } -void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } -void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } +void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } +void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } void rdtsc() { db(0x0F); db(0x31); } void rdtscp() { db(0x0F); db(0x01); db(0xF9); } void rep() { db(0xF3); } +void repe() { db(0xF3); } +void repne() { db(0xF2); } +void repnz() { db(0xF2); } +void repz() { db(0xF3); } void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } } void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); } void rol(const Operand& op, int imm) { opShift(op, imm, 0); } void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); } void ror(const Operand& op, int imm) { opShift(op, imm, 1); } -void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); } -void roundpd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void roundps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); } +void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); } void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); } void sahf() { db(0x9E); } @@ -684,7 +734,7 @@ void sal(const Operand& op, int imm) { opShift(op, imm, 4); } void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); } void sar(const Operand& op, int imm) { opShift(op, imm, 7); } void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); } -void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } +void sbb(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x18, 3); } void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } void scasb() { db(0xAE); } void scasd() { db(0xAF); } @@ -723,22 +773,22 @@ void sfence() { db(0x0F); db(0xAE); db(0xF8); } void sha1msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC9, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha1msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCA, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha1nexte(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC8, NONE, isXMM_XMMorMEM, NONE, 0x38); } -void sha1rnds4(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); } +void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); } void sha256msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha256msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCD, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha256rnds2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCB, NONE, isXMM_XMMorMEM, NONE, 0x38); } void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } void shl(const Operand& op, int imm) { opShift(op, imm, 4); } void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); } -void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); } +void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xA4); } void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_66 | T_0F38, 0xf7, false); } void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); } void shr(const Operand& op, int imm) { opShift(op, imm, 5); } void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); } -void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); } +void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xAC); } void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F2 | T_0F38, 0xf7, false); } -void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } -void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } +void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } +void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); } void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); } void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); } @@ -751,12 +801,14 @@ void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } void stosb() { db(0xAA); } void stosd() { db(0xAB); } void stosw() { db(0x66); db(0xAB); } -void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } +void sub(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x28, 5); } void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); } void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); } void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); } void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); } +void sysenter() { db(0x0F); db(0x34); } +void sysexit() { db(0x0F); db(0x35); } void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); } void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); } void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); } @@ -776,19 +828,19 @@ void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operan void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); } void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); } void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); } -void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); } +void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); } void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); } void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); } void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); } void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); } -void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); } -void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); } +void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); } +void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); } void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); } void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); } void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); } void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); } -void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); } -void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); } +void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); } +void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); } void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } @@ -901,10 +953,10 @@ void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } -void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); } -void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); } -void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); } -void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); } +void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); } +void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); } +void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); } +void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); } void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } @@ -930,7 +982,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_ void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); } void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); } void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); } -void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); } +void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); } void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); } void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); } void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); } @@ -945,11 +997,11 @@ void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); } void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); } void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); } -void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); } -void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); } -void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); } -void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); } -void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); } +void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); } +void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); } +void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); } +void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); } +void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); } void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); } void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); } void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); } @@ -1014,16 +1066,16 @@ void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1 void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); } void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); } void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); } -void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } -void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } +void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } +void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); } void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); } void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); } void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); } void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); } -void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } -void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } -void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } +void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } +void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } +void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); } void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); } void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); } @@ -1043,25 +1095,25 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_ void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); } void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); } void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); } -void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); } -void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); } +void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); } +void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP | T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); } void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); } void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); } void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); } void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); } -void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); } +void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); } void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); } -void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); } +void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); } void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); } -void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); } -void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } +void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); } +void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); } -void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); } +void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); } void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); } -void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); } -void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } -void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } +void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); } +void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } +void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); } void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); } void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); } @@ -1071,17 +1123,17 @@ void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); } void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_M_K, 0x11); } void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } -void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } +void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); } void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); } void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); } void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } -void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } +void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); } void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); } void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); } void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); } -void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); } +void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); } void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); } void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); } void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); } @@ -1103,47 +1155,47 @@ void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); } void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); } void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); } -void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); } +void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); } void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); } void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); } void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); } void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE3); } -void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); } +void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); } void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); } -void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); } -void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); } -void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); } -void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); } -void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); } -void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); } +void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); } +void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); } +void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); } +void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); } +void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); } +void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); } void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); } void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); } void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); } void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x75); } -void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); } -void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); } +void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); } +void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); } void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x64); } void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x66); } void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x37); } void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); } -void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } -void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } -void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } -void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } +void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } +void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } +void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } +void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); } void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); } -void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); } +void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); } void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); } -void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); } -void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); } +void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); } +void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); } void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x16); } void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); } -void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); } +void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); } void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); } -void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); } -void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); } -void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } -void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } +void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); } +void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); } +void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } +void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); } void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); } void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); } @@ -1155,10 +1207,10 @@ void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); } void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); } void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); } -void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); } -void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); } -void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); } -void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); } +void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); } +void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); } +void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); } +void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); } void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); } void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); } void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); } @@ -1177,7 +1229,7 @@ void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); } void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); } void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); } -void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); } void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); } void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); } void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); } @@ -1200,34 +1252,34 @@ void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); } void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); } void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); } -void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } -void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } -void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); } +void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); } void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); } void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); } void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); } -void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); } -void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } -void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } +void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); } void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); } void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); } -void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); } -void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); } void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); } -void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); } -void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); } -void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } -void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } +void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); } void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); } void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); } -void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); } void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); } void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); } @@ -1249,14 +1301,14 @@ void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM( void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEF); } void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); } void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x53); } -void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); } -void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); } -void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); } -void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); } +void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); } +void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); } +void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); } +void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); } void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); } void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); } -void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); } -void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); } +void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); } +void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); } void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); } void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); } void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); } @@ -1284,13 +1336,13 @@ void wrmsr() { db(0x0F); db(0x30); } void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); } void xgetbv() { db(0x0F); db(0x01); db(0xD0); } void xlatb() { db(0xD7); } -void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +void xor_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x30, 6); } void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); } void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); } #ifdef XBYAK_ENABLE_OMITTED_OPERAND -void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); } -void vblendps(const Xmm& x, const Operand& op, uint8 imm) { vblendps(x, x, op, imm); } +void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); } +void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); } void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvpd(x1, x1, op, x4); } void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvps(x1, x1, op, x4); } void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmpeq_ospd(x, x, op); } @@ -1405,10 +1457,10 @@ void vcmpordpd(const Xmm& x, const Operand& op) { vcmpordpd(x, x, op); } void vcmpordps(const Xmm& x, const Operand& op) { vcmpordps(x, x, op); } void vcmpordsd(const Xmm& x, const Operand& op) { vcmpordsd(x, x, op); } void vcmpordss(const Xmm& x, const Operand& op) { vcmpordss(x, x, op); } -void vcmppd(const Xmm& x, const Operand& op, uint8 imm) { vcmppd(x, x, op, imm); } -void vcmpps(const Xmm& x, const Operand& op, uint8 imm) { vcmpps(x, x, op, imm); } -void vcmpsd(const Xmm& x, const Operand& op, uint8 imm) { vcmpsd(x, x, op, imm); } -void vcmpss(const Xmm& x, const Operand& op, uint8 imm) { vcmpss(x, x, op, imm); } +void vcmppd(const Xmm& x, const Operand& op, uint8_t imm) { vcmppd(x, x, op, imm); } +void vcmpps(const Xmm& x, const Operand& op, uint8_t imm) { vcmpps(x, x, op, imm); } +void vcmpsd(const Xmm& x, const Operand& op, uint8_t imm) { vcmpsd(x, x, op, imm); } +void vcmpss(const Xmm& x, const Operand& op, uint8_t imm) { vcmpss(x, x, op, imm); } void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmptrue_uspd(x, x, op); } void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmptrue_usps(x, x, op); } void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmptrue_ussd(x, x, op); } @@ -1429,10 +1481,10 @@ void vcvtsd2ss(const Xmm& x, const Operand& op) { vcvtsd2ss(x, x, op); } void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); } void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); } void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); } -void vdppd(const Xmm& x, const Operand& op, uint8 imm) { vdppd(x, x, op, imm); } -void vdpps(const Xmm& x, const Operand& op, uint8 imm) { vdpps(x, x, op, imm); } -void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); } -void vmpsadbw(const Xmm& x, const Operand& op, uint8 imm) { vmpsadbw(x, x, op, imm); } +void vdppd(const Xmm& x, const Operand& op, uint8_t imm) { vdppd(x, x, op, imm); } +void vdpps(const Xmm& x, const Operand& op, uint8_t imm) { vdpps(x, x, op, imm); } +void vinsertps(const Xmm& x, const Operand& op, uint8_t imm) { vinsertps(x, x, op, imm); } +void vmpsadbw(const Xmm& x, const Operand& op, uint8_t imm) { vmpsadbw(x, x, op, imm); } void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); } void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); } void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); } @@ -1445,15 +1497,15 @@ void vpaddsw(const Xmm& x, const Operand& op) { vpaddsw(x, x, op); } void vpaddusb(const Xmm& x, const Operand& op) { vpaddusb(x, x, op); } void vpaddusw(const Xmm& x, const Operand& op) { vpaddusw(x, x, op); } void vpaddw(const Xmm& x, const Operand& op) { vpaddw(x, x, op); } -void vpalignr(const Xmm& x, const Operand& op, uint8 imm) { vpalignr(x, x, op, imm); } +void vpalignr(const Xmm& x, const Operand& op, uint8_t imm) { vpalignr(x, x, op, imm); } void vpand(const Xmm& x, const Operand& op) { vpand(x, x, op); } void vpandn(const Xmm& x, const Operand& op) { vpandn(x, x, op); } void vpavgb(const Xmm& x, const Operand& op) { vpavgb(x, x, op); } void vpavgw(const Xmm& x, const Operand& op) { vpavgw(x, x, op); } -void vpblendd(const Xmm& x, const Operand& op, uint8 imm) { vpblendd(x, x, op, imm); } +void vpblendd(const Xmm& x, const Operand& op, uint8_t imm) { vpblendd(x, x, op, imm); } void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { vpblendvb(x1, x1, op, x4); } -void vpblendw(const Xmm& x, const Operand& op, uint8 imm) { vpblendw(x, x, op, imm); } -void vpclmulqdq(const Xmm& x, const Operand& op, uint8 imm) { vpclmulqdq(x, x, op, imm); } +void vpblendw(const Xmm& x, const Operand& op, uint8_t imm) { vpblendw(x, x, op, imm); } +void vpclmulqdq(const Xmm& x, const Operand& op, uint8_t imm) { vpclmulqdq(x, x, op, imm); } void vpcmpeqb(const Xmm& x, const Operand& op) { vpcmpeqb(x, x, op); } void vpcmpeqd(const Xmm& x, const Operand& op) { vpcmpeqd(x, x, op); } void vpcmpeqq(const Xmm& x, const Operand& op) { vpcmpeqq(x, x, op); } @@ -1468,10 +1520,10 @@ void vphaddw(const Xmm& x, const Operand& op) { vphaddw(x, x, op); } void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); } void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); } void vphsubw(const Xmm& x, const Operand& op) { vphsubw(x, x, op); } -void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); } -void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); } -void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); } -void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); } +void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); } +void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); } +void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); } +void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); } void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); } void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); } void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); } @@ -1499,23 +1551,23 @@ void vpsignb(const Xmm& x, const Operand& op) { vpsignb(x, x, op); } void vpsignd(const Xmm& x, const Operand& op) { vpsignd(x, x, op); } void vpsignw(const Xmm& x, const Operand& op) { vpsignw(x, x, op); } void vpslld(const Xmm& x, const Operand& op) { vpslld(x, x, op); } -void vpslld(const Xmm& x, uint8 imm) { vpslld(x, x, imm); } -void vpslldq(const Xmm& x, uint8 imm) { vpslldq(x, x, imm); } +void vpslld(const Xmm& x, uint8_t imm) { vpslld(x, x, imm); } +void vpslldq(const Xmm& x, uint8_t imm) { vpslldq(x, x, imm); } void vpsllq(const Xmm& x, const Operand& op) { vpsllq(x, x, op); } -void vpsllq(const Xmm& x, uint8 imm) { vpsllq(x, x, imm); } +void vpsllq(const Xmm& x, uint8_t imm) { vpsllq(x, x, imm); } void vpsllw(const Xmm& x, const Operand& op) { vpsllw(x, x, op); } -void vpsllw(const Xmm& x, uint8 imm) { vpsllw(x, x, imm); } +void vpsllw(const Xmm& x, uint8_t imm) { vpsllw(x, x, imm); } void vpsrad(const Xmm& x, const Operand& op) { vpsrad(x, x, op); } -void vpsrad(const Xmm& x, uint8 imm) { vpsrad(x, x, imm); } +void vpsrad(const Xmm& x, uint8_t imm) { vpsrad(x, x, imm); } void vpsraw(const Xmm& x, const Operand& op) { vpsraw(x, x, op); } -void vpsraw(const Xmm& x, uint8 imm) { vpsraw(x, x, imm); } +void vpsraw(const Xmm& x, uint8_t imm) { vpsraw(x, x, imm); } void vpsrld(const Xmm& x, const Operand& op) { vpsrld(x, x, op); } -void vpsrld(const Xmm& x, uint8 imm) { vpsrld(x, x, imm); } -void vpsrldq(const Xmm& x, uint8 imm) { vpsrldq(x, x, imm); } +void vpsrld(const Xmm& x, uint8_t imm) { vpsrld(x, x, imm); } +void vpsrldq(const Xmm& x, uint8_t imm) { vpsrldq(x, x, imm); } void vpsrlq(const Xmm& x, const Operand& op) { vpsrlq(x, x, op); } -void vpsrlq(const Xmm& x, uint8 imm) { vpsrlq(x, x, imm); } +void vpsrlq(const Xmm& x, uint8_t imm) { vpsrlq(x, x, imm); } void vpsrlw(const Xmm& x, const Operand& op) { vpsrlw(x, x, op); } -void vpsrlw(const Xmm& x, uint8 imm) { vpsrlw(x, x, imm); } +void vpsrlw(const Xmm& x, uint8_t imm) { vpsrlw(x, x, imm); } void vpsubb(const Xmm& x, const Operand& op) { vpsubb(x, x, op); } void vpsubd(const Xmm& x, const Operand& op) { vpsubd(x, x, op); } void vpsubq(const Xmm& x, const Operand& op) { vpsubq(x, x, op); } @@ -1534,11 +1586,11 @@ void vpunpcklqdq(const Xmm& x, const Operand& op) { vpunpcklqdq(x, x, op); } void vpunpcklwd(const Xmm& x, const Operand& op) { vpunpcklwd(x, x, op); } void vpxor(const Xmm& x, const Operand& op) { vpxor(x, x, op); } void vrcpss(const Xmm& x, const Operand& op) { vrcpss(x, x, op); } -void vroundsd(const Xmm& x, const Operand& op, uint8 imm) { vroundsd(x, x, op, imm); } -void vroundss(const Xmm& x, const Operand& op, uint8 imm) { vroundss(x, x, op, imm); } +void vroundsd(const Xmm& x, const Operand& op, uint8_t imm) { vroundsd(x, x, op, imm); } +void vroundss(const Xmm& x, const Operand& op, uint8_t imm) { vroundss(x, x, op, imm); } void vrsqrtss(const Xmm& x, const Operand& op) { vrsqrtss(x, x, op); } -void vshufpd(const Xmm& x, const Operand& op, uint8 imm) { vshufpd(x, x, op, imm); } -void vshufps(const Xmm& x, const Operand& op, uint8 imm) { vshufps(x, x, op, imm); } +void vshufpd(const Xmm& x, const Operand& op, uint8_t imm) { vshufpd(x, x, op, imm); } +void vshufps(const Xmm& x, const Operand& op, uint8_t imm) { vshufps(x, x, op, imm); } void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); } void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); } void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); } @@ -1554,21 +1606,39 @@ void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } void cdqe() { db(0x48); db(0x98); } void cqo() { db(0x48); db(0x99); } void cmpsq() { db(0x48); db(0xA7); } +void popfq() { db(0x9D); } +void pushfq() { db(0x9C); } +void lodsq() { db(0x48); db(0xAD); } void movsq() { db(0x48); db(0xA5); } void scasq() { db(0x48); db(0xAF); } void stosq() { db(0x48); db(0xAB); } +void syscall() { db(0x0F); db(0x05); } +void sysret() { db(0x0F); db(0x07); } void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); } +void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); } void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); } void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); } -void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); } -void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); } -void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); } +void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); } +void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); } +void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); } void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); } void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); } void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); } void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); } void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); } void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); } +void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); } +void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); } +void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); } +void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); } +void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); } +void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); } +void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); } +void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); } +void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); } +void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); } +void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); } +void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); } #else void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } @@ -1580,20 +1650,23 @@ void aam() { db(0xD4); db(0x0A); } void aas() { db(0x3F); } void daa() { db(0x27); } void das() { db(0x2F); } +void into() { db(0xCE); } void popad() { db(0x61); } void popfd() { db(0x9D); } void pusha() { db(0x60); } void pushad() { db(0x60); } void pushfd() { db(0x9C); } void popa() { db(0x61); } +void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); } +void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); } #endif #ifndef XBYAK_NO_OP_NAMES void and(const Operand& op1, const Operand& op2) { and_(op1, op2); } -void and(const Operand& op, uint32 imm) { and_(op, imm); } +void and(const Operand& op, uint32_t imm) { and_(op, imm); } void or(const Operand& op1, const Operand& op2) { or_(op1, op2); } -void or(const Operand& op, uint32 imm) { or_(op, imm); } +void or(const Operand& op, uint32_t imm) { or_(op, imm); } void xor(const Operand& op1, const Operand& op2) { xor_(op1, op2); } -void xor(const Operand& op, uint32 imm) { xor_(op, imm); } +void xor(const Operand& op, uint32_t imm) { xor_(op, imm); } void not(const Operand& op) { not_(op); } #endif #ifndef XBYAK_DISABLE_AVX512 @@ -1610,17 +1683,17 @@ void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); } void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); } void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); } -void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } +void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); } void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); } void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); } -void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } +void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); } void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); } void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); } -void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } +void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); } -void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } +void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); } void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); } void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); } @@ -1635,14 +1708,14 @@ void kortestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 void kortestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x98); } void kortestw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x98); } void korw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x45); } -void kshiftlb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); } -void kshiftld(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); } -void kshiftlq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); } -void kshiftlw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); } -void kshiftrb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); } -void kshiftrd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); } -void kshiftrq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); } -void kshiftrw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); } +void kshiftlb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); } +void kshiftld(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); } +void kshiftlq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); } +void kshiftlw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); } +void kshiftrb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); } +void kshiftrd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); } +void kshiftrq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); } +void kshiftrw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); } void ktestb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x99); } void ktestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x99); } void ktestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x99); } @@ -1662,8 +1735,8 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); } void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); } void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); } -void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); } -void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); } void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); } void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); } void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); } @@ -1676,14 +1749,16 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_ void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); } void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); } void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); } -void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); } +void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); } +void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); } void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); } +void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } +void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); } void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); } void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); } @@ -1708,27 +1783,28 @@ void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); } void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } -void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } +void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } +void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); } void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); } void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); } void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); } void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); } -void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); } -void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); } -void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); } -void vextractf64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); } -void vextracti32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); } -void vextracti32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); } -void vextracti64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); } -void vextracti64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); } -void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); } -void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); } -void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } -void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } -void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } -void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } -void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } -void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); } +void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); } +void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); } +void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); } +void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); } +void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); } +void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); } +void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); } +void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); } +void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); } +void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); } +void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } +void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } +void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } +void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } +void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } +void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); } void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); } void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); } void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } @@ -1745,18 +1821,18 @@ void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); } void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); } void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); } -void vgetmantpd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); } -void vgetmantps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); } -void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } -void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } -void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); } -void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); } -void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); } -void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); } -void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); } -void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); } -void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); } -void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); } +void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); } +void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); } +void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } +void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } +void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); } +void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); } +void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); } +void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); } +void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); } +void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); } +void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); } +void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); } void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } @@ -1769,6 +1845,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); } +void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); } void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); } void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); } void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); } @@ -1785,8 +1863,8 @@ void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66 | T_0F38 | void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); } void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); } void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); } -void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); } -void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); } +void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); } +void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); } void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); } void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); } void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); } @@ -1795,12 +1873,12 @@ void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); } void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); } void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); } -void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); } -void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); } -void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); } -void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); } -void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); } -void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); } +void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); } +void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); } +void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); } +void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); } +void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); } +void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); } void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8B); } void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); } void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); } @@ -1873,39 +1951,39 @@ void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_ void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); } void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); } void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); } -void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } -void vprolq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } +void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x15); } void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x15); } -void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } -void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } +void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); } void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); } void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); } void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); } void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); } void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); } -void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); } -void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); } +void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); } +void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); } void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); } void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); } void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); } -void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); } -void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); } -void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); } +void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); } +void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); } +void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); } void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); } void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); } void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); } -void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } +void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); } void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); } -void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); } void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); } void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); } void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); } -void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); } -void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); } +void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); } +void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); } void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); } void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); } void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); } @@ -1916,10 +1994,10 @@ void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM( void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); } void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); } void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } -void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); } -void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); } -void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } -void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } +void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); } +void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); } +void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } +void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); } void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); } void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX, 0x4D); } @@ -1928,14 +2006,14 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_ void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); } void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); } void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); } -void vreducepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); } -void vreduceps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); } -void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } -void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } -void vrndscalepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); } -void vrndscaleps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); } -void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); } -void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); } +void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); } +void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); } +void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } +void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } +void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); } +void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); } +void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); } +void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); } void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); } void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); } void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); } @@ -1960,10 +2038,10 @@ void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); } void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); } -void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } -void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } -void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } -void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } +void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } +void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } +void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } +void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } #ifdef XBYAK64 void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); } void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index c2474c5b..1516fc33 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -1,6 +1,18 @@ #ifndef XBYAK_XBYAK_UTIL_H_ #define XBYAK_XBYAK_UTIL_H_ +#ifdef XBYAK_ONLY_CLASS_CPU +#include +#include +#include +#include +#ifndef XBYAK_THROW + #define XBYAK_THROW(x) ; + #define XBYAK_THROW_RET(x, y) return y; +#endif +#else +#include + /** utility class and functions for Xbyak Xbyak::util::Clock ; rdtsc timer @@ -8,6 +20,7 @@ @note this header is UNDER CONSTRUCTION! */ #include "xbyak.h" +#endif // XBYAK_ONLY_CLASS_CPU #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) #define XBYAK_INTEL_CPU_SPECIFIC @@ -54,6 +67,20 @@ #endif #endif +#ifdef XBYAK_USE_VTUNE + // -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl + #include + #ifdef _MSC_VER + #pragma comment(lib, "libittnotify.lib") + #endif + #ifdef __linux__ + #include + #endif +#endif +#ifdef __linux__ + #define XBYAK_USE_PERF +#endif + namespace Xbyak { namespace util { typedef enum { @@ -65,7 +92,7 @@ typedef enum { CPU detection class */ class Cpu { - uint64 type_; + uint64_t type_; //system topology bool x2APIC_supported_; static const size_t maxTopologyLevels = 2; @@ -132,6 +159,11 @@ class Cpu { numCores_[level - 1] = extractBit(data[1], 0, 15); } } + /* + Fallback values in case a hypervisor has 0xB leaf zeroed-out. + */ + numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]); + numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]); } else { /* Failed to deremine num of cores without x2APIC support. @@ -199,24 +231,24 @@ public: int displayFamily; // family + extFamily int displayModel; // model + extModel - unsigned int getNumCores(IntelCpuTopologyLevel level) { - if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED); + unsigned int getNumCores(IntelCpuTopologyLevel level) const { + if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0) switch (level) { case SmtLevel: return numCores_[level - 1]; case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1]; - default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED); + default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0) } } unsigned int getDataCacheLevels() const { return dataCacheLevels_; } unsigned int getCoresSharingDataCache(unsigned int i) const { - if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER); + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) return coresSharignDataCache_[i]; } unsigned int getDataCacheSize(unsigned int i) const { - if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER); + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) return dataCacheSize_[i]; } @@ -250,7 +282,7 @@ public: (void)data; #endif } - static inline uint64 getXfeature() + static inline uint64_t getXfeature() { #ifdef XBYAK_INTEL_CPU_SPECIFIC #ifdef _MSC_VER @@ -260,13 +292,13 @@ public: // xgetvb is not support on gcc 4.2 // __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); - return ((uint64)edx << 32) | eax; + return ((uint64_t)edx << 32) | eax; #endif #else return 0; #endif } - typedef uint64 Type; + typedef uint64_t Type; static const Type NONE = 0; static const Type tMMX = 1 << 0; @@ -303,34 +335,39 @@ public: static const Type tADX = 1 << 28; // adcx, adox static const Type tRDSEED = 1 << 29; // rdseed static const Type tSMAP = 1 << 30; // stac - static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest - static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort - static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph - static const Type tMOVBE = uint64(1) << 34; // mobve - static const Type tAVX512F = uint64(1) << 35; - static const Type tAVX512DQ = uint64(1) << 36; - static const Type tAVX512_IFMA = uint64(1) << 37; + static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest + static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort + static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph + static const Type tMOVBE = uint64_t(1) << 34; // mobve + static const Type tAVX512F = uint64_t(1) << 35; + static const Type tAVX512DQ = uint64_t(1) << 36; + static const Type tAVX512_IFMA = uint64_t(1) << 37; static const Type tAVX512IFMA = tAVX512_IFMA; - static const Type tAVX512PF = uint64(1) << 38; - static const Type tAVX512ER = uint64(1) << 39; - static const Type tAVX512CD = uint64(1) << 40; - static const Type tAVX512BW = uint64(1) << 41; - static const Type tAVX512VL = uint64(1) << 42; - static const Type tAVX512_VBMI = uint64(1) << 43; + static const Type tAVX512PF = uint64_t(1) << 38; + static const Type tAVX512ER = uint64_t(1) << 39; + static const Type tAVX512CD = uint64_t(1) << 40; + static const Type tAVX512BW = uint64_t(1) << 41; + static const Type tAVX512VL = uint64_t(1) << 42; + static const Type tAVX512_VBMI = uint64_t(1) << 43; static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual - static const Type tAVX512_4VNNIW = uint64(1) << 44; - static const Type tAVX512_4FMAPS = uint64(1) << 45; - static const Type tPREFETCHWT1 = uint64(1) << 46; - static const Type tPREFETCHW = uint64(1) << 47; - static const Type tSHA = uint64(1) << 48; - static const Type tMPX = uint64(1) << 49; - static const Type tAVX512_VBMI2 = uint64(1) << 50; - static const Type tGFNI = uint64(1) << 51; - static const Type tVAES = uint64(1) << 52; - static const Type tVPCLMULQDQ = uint64(1) << 53; - static const Type tAVX512_VNNI = uint64(1) << 54; - static const Type tAVX512_BITALG = uint64(1) << 55; - static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56; + static const Type tAVX512_4VNNIW = uint64_t(1) << 44; + static const Type tAVX512_4FMAPS = uint64_t(1) << 45; + static const Type tPREFETCHWT1 = uint64_t(1) << 46; + static const Type tPREFETCHW = uint64_t(1) << 47; + static const Type tSHA = uint64_t(1) << 48; + static const Type tMPX = uint64_t(1) << 49; + static const Type tAVX512_VBMI2 = uint64_t(1) << 50; + static const Type tGFNI = uint64_t(1) << 51; + static const Type tVAES = uint64_t(1) << 52; + static const Type tVPCLMULQDQ = uint64_t(1) << 53; + static const Type tAVX512_VNNI = uint64_t(1) << 54; + static const Type tAVX512_BITALG = uint64_t(1) << 55; + static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56; + static const Type tAVX512_BF16 = uint64_t(1) << 57; + static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58; + static const Type tAMX_TILE = uint64_t(1) << 59; + static const Type tAMX_INT8 = uint64_t(1) << 60; + static const Type tAMX_BF16 = uint64_t(1) << 61; Cpu() : type_(NONE) @@ -385,7 +422,7 @@ public: if (type_ & tOSXSAVE) { // check XFEATURE_ENABLED_MASK[2:1] = '11b' - uint64 bv = getXfeature(); + uint64_t bv = getXfeature(); if ((bv & 6) == 6) { if (ECX & (1U << 28)) type_ |= tAVX; if (ECX & (1U << 12)) type_ |= tFMA; @@ -410,6 +447,12 @@ public: if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ; if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW; if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS; + if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; + } + // EAX=07H, ECX=1 + getCpuidEx(7, 1, data); + if (type_ & tAVX512F) { + if (EAX & (1U << 5)) type_ |= tAVX512_BF16; } } } @@ -428,6 +471,9 @@ public: if (EBX & (1U << 14)) type_ |= tMPX; if (EBX & (1U << 29)) type_ |= tSHA; if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; + if (EDX & (1U << 24)) type_ |= tAMX_TILE; + if (EDX & (1U << 25)) type_ |= tAMX_INT8; + if (EDX & (1U << 22)) type_ |= tAMX_BF16; } setFamily(); setNumCores(); @@ -435,9 +481,11 @@ public: } void putFamily() const { +#ifndef XBYAK_ONLY_CLASS_CPU printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", family, model, stepping, extFamily, extModel); printf("display:family=%X, model=%X\n", displayFamily, displayModel); +#endif } bool has(Type type) const { @@ -445,9 +493,10 @@ public: } }; +#ifndef XBYAK_ONLY_CLASS_CPU class Clock { public: - static inline uint64 getRdtsc() + static inline uint64_t getRdtsc() { #ifdef XBYAK_INTEL_CPU_SPECIFIC #ifdef _MSC_VER @@ -455,7 +504,7 @@ public: #else unsigned int eax, edx; __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); - return ((uint64)edx << 32) | eax; + return ((uint64_t)edx << 32) | eax; #endif #else // TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu @@ -477,10 +526,10 @@ public: count_++; } int getCount() const { return count_; } - uint64 getClock() const { return clock_; } + uint64_t getClock() const { return clock_; } void clear() { count_ = 0; clock_ = 0; } private: - uint64 clock_; + uint64_t clock_; int count_; }; @@ -530,7 +579,7 @@ public: { if (n_ == maxTblNum) { fprintf(stderr, "ERR Pack::can't append\n"); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this) } tbl_[n_++] = &t; return *this; @@ -539,7 +588,7 @@ public: { if (n > maxTblNum) { fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW(ERR_BAD_PARAMETER) } n_ = n; for (size_t i = 0; i < n; i++) { @@ -550,7 +599,7 @@ public: { if (n >= n_) { fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax) } return *tbl_[n]; } @@ -563,7 +612,7 @@ public: if (num == size_t(-1)) num = n_ - pos; if (pos + num > n_) { fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack()) } Pack pack; pack.n_ = num; @@ -638,9 +687,9 @@ public: , t(t_) { using namespace Xbyak; - if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); + if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM) const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); - if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM); + if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM) const Reg64& _rsp = code->rsp; saveNum_ = (std::max)(0, allRegNum - noSaveNum); const int *tbl = getOrderTbl() + noSaveNum; @@ -682,12 +731,7 @@ public: ~StackFrame() { if (!makeEpilog_) return; - try { - close(); - } catch (std::exception& e) { - printf("ERR:StackFrame %s\n", e.what()); - exit(1); - } + close(); } private: const int *getOrderTbl() const @@ -722,5 +766,137 @@ private: }; #endif -} } // end of util +class Profiler { + int mode_; + const char *suffix_; + const void *startAddr_; +#ifdef XBYAK_USE_PERF + FILE *fp_; +#endif +public: + enum { + None = 0, + Perf = 1, + VTune = 2 + }; + Profiler() + : mode_(None) + , suffix_("") + , startAddr_(0) +#ifdef XBYAK_USE_PERF + , fp_(0) +#endif + { + } + // append suffix to funcName + void setNameSuffix(const char *suffix) + { + suffix_ = suffix; + } + void setStartAddr(const void *startAddr) + { + startAddr_ = startAddr; + } + void init(int mode) + { + mode_ = None; + switch (mode) { + default: + case None: + return; + case Perf: +#ifdef XBYAK_USE_PERF + close(); + { + const int pid = getpid(); + char name[128]; + snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid); + fp_ = fopen(name, "a+"); + if (fp_ == 0) { + fprintf(stderr, "can't open %s\n", name); + return; + } + } + mode_ = Perf; +#endif + return; + case VTune: +#ifdef XBYAK_USE_VTUNE + dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling + if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) { + fprintf(stderr, "VTune profiling is not active\n"); + return; + } + mode_ = VTune; +#endif + return; + } + } + ~Profiler() + { + close(); + } + void close() + { +#ifdef XBYAK_USE_PERF + if (fp_ == 0) return; + fclose(fp_); + fp_ = 0; +#endif + } + void set(const char *funcName, const void *startAddr, size_t funcSize) const + { + if (mode_ == None) return; +#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE) + (void)funcName; + (void)startAddr; + (void)funcSize; +#endif +#ifdef XBYAK_USE_PERF + if (mode_ == Perf) { + if (fp_ == 0) return; + fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_); + /* + perf does not recognize the function name which is less than 3, + so append '_' at the end of the name if necessary + */ + size_t n = strlen(funcName) + strlen(suffix_); + for (size_t i = n; i < 3; i++) { + fprintf(fp_, "_"); + } + fprintf(fp_, "\n"); + fflush(fp_); + } +#endif +#ifdef XBYAK_USE_VTUNE + if (mode_ != VTune) return; + char className[] = ""; + char fileName[] = ""; + iJIT_Method_Load jmethod = {}; + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.class_file_name = className; + jmethod.source_file_name = fileName; + jmethod.method_load_address = const_cast(startAddr); + jmethod.method_size = funcSize; + jmethod.line_number_size = 0; + char buf[128]; + snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_); + jmethod.method_name = buf; + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); +#endif + } + /* + for continuous set + funcSize = endAddr - + */ + void set(const char *funcName, const void *endAddr) + { + set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_); + startAddr_ = endAddr; + } +}; +#endif // XBYAK_ONLY_CLASS_CPU + +} } // end of util + #endif