From 8042dc93e8115f7d489edd410633c82e9991846d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 19 Sep 2020 11:27:42 -0400 Subject: [PATCH] Squashed 'externals/xbyak/' changes from 73ac5866..0140eeff 0140eeff Merge branch 'dev' 1efe14b2 change the original behavior of SetError 83c89c7a rename and fix indent 8be7ca93 Merge branch 'sbogusev-master' into dev 070b4c09 make l_err() inline with block scope static TLS l_error 9a4e6579 v5.97 d0ced1bc XBYAK_ONLY_CLASS_CPU is for only util::Cpu bb967ae7 replace uint32 with uint32_t etc. c306b8e5 update to v5.95 605e4224 use noexcept if C++11 or later 7a17c2c8 remove warning 5dfa4462 use constexpr if c++14 or later 18c9caaa Merge branch 'densamoilov-fix-mov-interface' into dev 3966ba9d fix mov interface be492be1 change the behavior of push((byte|word), imm) to cast imm to int8_t/int16_t d9696b54 Merge pull request #102 from igorsafo/master ea73267f Cpu: make getNumCores constant ff0b10e9 Merge pull request #101 from densamoilov/use-thread_local-when-supported 0c4eafc3 use thread_local for XBYAK_TLS when supported c1aea35e CodeGenerator::reset() calls ClearError() b4df97b1 Merge branch 'cursey-no-winsock2-header' 6a47bb0e v5.94 9a1749e6 define WIN32_LEAN_AND_MEAN for including winsock2.h after xbyak.h 42dddb74 Remove #include 615b85fa update doc 9cd796a9 rename XBYAK_NOEXCEPTION to XBYAK_NO_EXCEPTION 7cdf227f use static to avoid multiple instance 38a28dec test_nm.bat supports noexcept 0fdffc6b XBYAK_NOEXCEPTION for -fno-exceptions eda6e2a3 v5.92 5c26c8bb mov(rax, imm64) on 32-bit env with XBYAK64 6208e3ae throw exception if not supported amx sibmem 2 c6737d14 mov amx insts from avx512 34ea5c16 throw exception if not supported amx sibmem 6f93fe35 fix test of sizeof(Operand) 5b89c3b2 remove T_TMM 5ce32858 gen_amx.cpp is merged into gen_avx512.cpp fe4f965f remove my alias for tmm registers 92f904d8 bit_ contains 8192 98b51da9 extend mnemonics with Intel(R) AMX ISA 8d1b4c9e add generation of Intel(R) AMX ISA mnemonics 8ded45d1 add support of Intel(R) AMX ISA b23c4b02 v5.912 ffe32a60 Merge branch 'rsdubtso-master' e7b7fd2f use MAP_JIT on macOS regardless of Xcode version 82b70e66 v5.911 ; XBYAK_USE_MMAP_ALLOCATOR is defined 2f6d9e34 fix test for mac a7d10a1e add link to GitHub Sponsor 96076265 accept k0 mask register (it means no mask) 7e3167e4 kmov{b,w,d,q} throws for unsupported reg f487d7b7 Merge pull request #91 from marcelotrevisani/patch-1 dc9e6a79 Possibility to specify a different PREFIX 5fc69fc8 remove warning of test e69e0b42 fix typo of type of Zmi 34f797e8 perf does not recognize too short function name 6cc0f4df Consider max defined as a macro on Windows 5722393d fix for zeroed-out 0xb leaf 6a4459a8 Merge branch 'tyfkda-feature/fix-segfault-in-calc' 47922ed9 Fix segmentation fault in calc sample 8f696e93 add test_avx512 to bat 00114d79 add .travis.yml a29fa27b refactor test 508b543c fix error of vfpclasspd 0d54f1b1 fix for windows 4da8fd4e add setDefaultJmpNEAR da7f7317 revert to the behavior before v5.84 if -fno-operator-names is defined 7dac9f61 update to v5.85 fe639332 enable MAP_JIT only if mojave or later 4443d791 specify MAP_JIT mmap flag on macOS 20ee4c2d update doc ca0e8395 [changed] XBYAK_NO_OP_NAMES is defined f32836da remove exit(1) a1e9adf2 v5.82 08b8b1ba Support AMD Zen New Instructions. 2501ba9a remove *.user and *.vcproj 5c2ea988 Merge branch 'jrmwng-feature/upgrade-to-vs2017/jrmwng' 35847f7a Merge branch 'feature/upgrade-to-vs2017/jrmwng' of https://github.com/jrmwng/xbyak into jrmwng-feature/upgrade-to-vs2017/jrmwng ef267775 address "warning LNK4075: ignoring '/EDITANDCONTINUE' due to '/SAFESEH' specification" 4a6c59bb address a conflict of sharing intermediate directory by different projects 9577cbf3 inherit "some output locations" from parent or project defaults 6c5f7186 upgrade projects from VS2018 to VS2017 4ca0434b v5.81 72b4e95d add lds/lss/les/lfs/lgs cc8f037c fix ; move ERR_INTERNAL to the end 9e9ec1c3 add repe, repne, repne, prez eea0edc3 add some fpu mnemonics 06235fa6 add loop/loope/loopne 7fc0c2bb add enter/leave 9fa2ef3c add in_, out_ df208648 add lods{b,w,d,q}, outs{b,w,d} 4672d2cb add int3, int_, into 431977cb add pushfq, popfq 81c4749f syscall, sysenter, sysexit, sysret 1f1b53c4 add clflushopt, fldenv, fnstw b765db33 Profiler uses append mode 44dc3546 add Profiler class 42949334 update version to v5.802 91cb919b Merge branch 'vpirogov-master' a6452f82 fixed avx512_bf16 detection f41da5aa tweak ; vcvtneps2bf16 calls opCvt2 b12460ba [sample] fix typo of quantize.cpp b22f5881 add set_opt.bat for test on Windows f402faad add vp2intersectd/vp2intersectq 4cfd5208 add avx512_bf16 4033564c fix vcmppd/vcmpps for ptr_b git-subtree-dir: externals/xbyak git-subtree-split: 0140eeff1fffcf5069dea3abb57095695320971c --- .travis.yml | 12 + Makefile | 2 +- gen/gen_avx512.cpp | 93 ++-- gen/gen_code.cpp | 380 ++++++++++------ readme.md | 101 ++++- readme.txt | 45 +- sample/Makefile | 24 +- sample/bf.cpp | 2 +- sample/bf.vcproj | 427 ------------------ sample/bf.vcxproj | 228 ++++++++++ sample/calc.cpp | 6 +- sample/calc.vcproj | 423 ------------------ sample/calc.vcxproj | 228 ++++++++++ sample/calc2.cpp | 4 +- sample/profiler.cpp | 90 ++++ sample/quantize.cpp | 35 +- sample/quantize.vcproj | 427 ------------------ sample/quantize.vcxproj | 228 ++++++++++ sample/test0.cpp | 6 +- sample/test0.vcproj | 427 ------------------ sample/test0.vcxproj | 228 ++++++++++ sample/test_util.cpp | 2 + sample/test_util.vcproj | 427 ------------------ sample/test_util.vcxproj | 228 ++++++++++ sample/toyvm.cpp | 48 +- sample/toyvm.vcproj | 427 ------------------ sample/toyvm.vcxproj | 228 ++++++++++ test/Makefile | 31 +- test/bad_address.cpp | 49 +-- test/jmp.cpp | 141 +++++- test/make_512.cpp | 205 ++++----- test/make_nm.cpp | 340 ++++++++++----- test/misc.cpp | 134 +++++- test/nm_frame.cpp | 37 +- test/noexception.cpp | 111 +++++ test/normalize_prefix.cpp | 2 +- test/set_opt.bat | 2 + test/sf_test.cpp | 4 +- test/test_address.sh | 1 - test/test_avx.sh | 5 +- test/test_avx512.sh | 3 +- test/test_avx_all.bat | 4 + test/test_nm.bat | 9 +- test/test_nm.sh | 12 +- xbyak.sln | 24 +- xbyak/xbyak.h | 896 ++++++++++++++++++++++---------------- xbyak/xbyak_mnemonic.h | 600 ++++++++++++++----------- xbyak/xbyak_util.h | 282 +++++++++--- 48 files changed, 3852 insertions(+), 3816 deletions(-) create mode 100644 .travis.yml delete mode 100644 sample/bf.vcproj create mode 100644 sample/bf.vcxproj delete mode 100644 sample/calc.vcproj create mode 100644 sample/calc.vcxproj create mode 100644 sample/profiler.cpp delete mode 100644 sample/quantize.vcproj create mode 100644 sample/quantize.vcxproj delete mode 100644 sample/test0.vcproj create mode 100644 sample/test0.vcxproj delete mode 100644 sample/test_util.vcproj create mode 100644 sample/test_util.vcxproj delete mode 100644 sample/toyvm.vcproj create mode 100644 sample/toyvm.vcxproj create mode 100644 test/noexception.cpp create mode 100644 test/set_opt.bat diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..4de8772d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +sudo: true +dist: bionic +language: cpp +compiler: + - gcc + - clang +addons: + apt: + packages: + - nasm yasm g++-multilib tcsh +script: + - make test diff --git a/Makefile b/Makefile index a7850a22..f91f6261 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -PREFIX=/usr/local +PREFIX?=/usr/local INSTALL_DIR=$(PREFIX)/include/xbyak all: diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 069ca540..250c8d4b 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -23,7 +23,7 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "kadd", 0x4A }, { "kand", 0x41 }, @@ -46,7 +46,7 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "knot", 0x44 }, { "kortest", 0x98 }, @@ -63,23 +63,23 @@ void putOpmask(bool only64bit) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "kshiftl", 0x32 }, { "kshiftr", 0x30 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; - printf("void %sw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); - printf("void %sb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); - printf("void %sd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); + printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); + printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); } } - puts("void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); - puts("void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); - puts("void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); - puts("void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); + puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); + puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); + puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); + puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); @@ -98,13 +98,13 @@ void putOpmask(bool only64bit) void putVcmp() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; } tbl[] = { - { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true }, - { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true }, + { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true }, + { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true }, { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, @@ -142,7 +142,7 @@ void putVcmp() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -150,7 +150,7 @@ void putVcmp() void putX_XM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -198,7 +198,7 @@ void putX_XM() void putM_X() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -219,7 +219,7 @@ void putM_X() void putXM_X() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -242,7 +242,7 @@ void putXM_X() void putX_X_XM_IMM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -368,12 +368,15 @@ void putX_X_XM_IMM() { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, + + { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, + { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -381,7 +384,7 @@ void putShift() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int idx; int type; } tbl[] = { @@ -394,7 +397,7 @@ void putShift() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } @@ -403,7 +406,7 @@ void putExtractInsert() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool isZMM; } tbl[] = { @@ -421,13 +424,13 @@ void putExtractInsert() const Tbl& p = tbl[i]; std::string type = type2String(p.type); const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; - printf("void %s(const Operand& op, const %s& r, uint8 imm) { if (!op.is(%s)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); + printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); } } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool isZMM; } tbl[] = { @@ -446,8 +449,8 @@ void putExtractInsert() std::string type = type2String(p.type); const char *x = p.isZMM ? "Zmm" : "Ymm"; const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; - printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8 imm) {" - "if (!%s) throw Error(ERR_BAD_COMBINATION); " + printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" + "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); } } @@ -457,7 +460,7 @@ void putBroadcast(bool only64bit) { { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; int reg; @@ -519,7 +522,7 @@ void putGather() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; int mode; } tbl[] = { { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, @@ -542,7 +545,7 @@ void putScatter() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; int mode; // reverse of gather } tbl[] = { { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, @@ -564,10 +567,10 @@ void putScatter() void putShuff() { - puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); - puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); - puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); - puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); + puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); + puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); + puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); + puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); } void putMov() @@ -624,7 +627,7 @@ void putMov() void putX_XM_IMM() { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -661,7 +664,7 @@ void putX_XM_IMM() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } @@ -674,7 +677,7 @@ void putMisc() const char *name; int zm; int type; - uint8 code; + uint8_t code; bool isZmm; } tbl[] = { { "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true }, @@ -705,12 +708,16 @@ void putMisc() } } - puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); - puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); - puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); - puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); + puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); + puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); + puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); + puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); + puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }"); + + puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }"); + puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }"); } void putV4FMA() @@ -728,7 +735,9 @@ int main(int argc, char *[]) bool only64bit = argc == 2; putOpmask(only64bit); putBroadcast(only64bit); - if (only64bit) return 0; + if (only64bit) { + return 0; + } putVcmp(); putX_XM(); putM_X(); diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index 43984c0c..ba7dbf61 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -23,9 +23,9 @@ void put_jREGz(const char *reg, bool prefix) struct GenericTbl { const char *name; - uint8 code1; - uint8 code2; - uint8 code3; + uint8_t code1; + uint8_t code2; + uint8_t code3; }; void putGeneric(const GenericTbl *p, size_t n) @@ -44,7 +44,7 @@ void putX_X_XM(bool omitOnly) // (x, x, x/m[, imm]) or (y, y, y/m[, imm]) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -212,25 +212,37 @@ void putX_X_XM(bool omitOnly) std::string type = type2String(p->type); if (omitOnly) { if (p->enableOmit) { - printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8 imm" : "", p->name, p->hasIMM ? ", imm" : ""); + printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : ""); } } else { if (p->mode & 1) { if (p->hasIMM) { - printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); + printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); } else { printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code); } } if (p->mode & 2) { printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } } } +void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32) +{ + printf("void %s(const Address& addr) { ", name); + if (prefix) printf("db(0x%02X); ", prefix); + printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2); +} + +void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE) +{ + printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2); +} + void put() { const int NO = CodeGenerator::NONE; @@ -250,7 +262,7 @@ void put() const int Q = 1 << 3; { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { // MMX @@ -300,7 +312,7 @@ void put() { const struct Tbl { - uint8 code; + uint8_t code; int mode; const char *name; } tbl[] = { @@ -334,7 +346,7 @@ void put() { const struct Tbl { - uint8 code; + uint8_t code; int ext; int mode; const char *name; @@ -364,8 +376,8 @@ void put() { const struct Tbl { - uint8 code; - uint8 pref; + uint8_t code; + uint8_t pref; const char *name; } tbl[] = { { 0x70, 0, "pshufw" }, @@ -375,13 +387,13 @@ void put() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); + printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); } } { const struct MmxTbl6 { - uint8 code; // for (reg, reg/[mem]) - uint8 code2; // for ([mem], reg) + uint8_t code; // for (reg, reg/[mem]) + uint8_t code2; // for ([mem], reg) int pref; const char *name; } mmxTbl6[] = { @@ -420,7 +432,7 @@ void put() { 0xF2, "sd" }, }; const struct Tbl { - uint8 code; + uint8_t code; int mode; const char *name; bool hasImm; @@ -449,8 +461,8 @@ void put() for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { if (!(p->mode & (1 << j))) continue; if (p->hasImm) { - // don't change uint8 to int because NO is not in byte - printf("void %s%s(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); + // don't change uint8_t to int because NO is not in byte + printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } else { printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } @@ -460,8 +472,8 @@ void put() { // (XMM, XMM) const struct Tbl { - uint8 code; - uint8 pref; + uint8_t code; + uint8_t pref; const char *name; } tbl[] = { { 0xF7, 0x66, "maskmovdqu" }, @@ -478,7 +490,7 @@ void put() { // (XMM, XMM|MEM) const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; } tbl[] = { @@ -510,7 +522,7 @@ void put() { // special type const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; const char *cond; @@ -554,7 +566,7 @@ void put() } { const struct Tbl { - uint8 code; + uint8_t code; int pref; const char *name; } tbl[] = { @@ -571,7 +583,7 @@ void put() { // cmov const struct Tbl { - uint8 ext; + uint8_t ext; const char *name; } tbl[] = { { 0, "o" }, @@ -616,6 +628,22 @@ void put() printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg); } } + { + const struct Tbl { + const char *name; + uint8_t code; + } tbl[] = { + { "loop", 0xE2 }, + { "loope", 0xE1 }, + { "loopne", 0xE0 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); + printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); + printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name); + } + } //////////////////////////////////////////////////////////////// { const GenericTbl tbl[] = { @@ -633,16 +661,28 @@ void put() { "cmpsb", 0xA6 }, { "cmpsw", 0x66, 0xA7 }, { "cmpsd", 0xA7 }, + { "int3", 0xCC }, { "scasb", 0xAE }, { "scasw", 0x66, 0xAF }, { "scasd", 0xAF }, { "movsb", 0xA4 }, + { "leave", 0xC9 }, + { "lodsb", 0xAC }, + { "lodsw", 0x66, 0xAD }, + { "lodsd", 0xAD }, { "movsw", 0x66, 0xA5 }, { "movsd", 0xA5 }, + { "outsb", 0x6E }, + { "outsw", 0x66, 0x6F }, + { "outsd", 0x6F }, { "stosb", 0xAA }, { "stosw", 0x66, 0xAB }, { "stosd", 0xAB }, { "rep", 0xF3 }, + { "repe", 0xF3 }, + { "repz", 0xF3 }, + { "repne", 0xF2 }, + { "repnz", 0xF2 }, { "lahf", 0x9F }, { "lock", 0xF0 }, @@ -651,6 +691,8 @@ void put() { "stc", 0xF9 }, { "std", 0xFD }, { "sti", 0xFB }, + { "sysenter", 0x0F, 0x34 }, + { "sysexit", 0x0F, 0x35 }, { "emms", 0x0F, 0x77 }, { "pause", 0xF3, 0x90 }, @@ -684,7 +726,8 @@ void put() { "fabs", 0xD9, 0xE1 }, { "faddp", 0xDE, 0xC1 }, { "fchs", 0xD9, 0xE0 }, - + { "fclex", 0x9B, 0xDB, 0xE2 }, + { "fnclex", 0xDB, 0xE2 }, { "fcom", 0xD8, 0xD1 }, { "fcomp", 0xD8, 0xD9 }, { "fcompp", 0xDE, 0xD9 }, @@ -724,13 +767,23 @@ void put() { "fxtract", 0xD9, 0xF4 }, { "fyl2x", 0xD9, 0xF1 }, { "fyl2xp1", 0xD9, 0xF9 }, + + // AMD Zen + { "monitorx", 0x0F, 0x01, 0xFA }, + { "mwaitx", 0x0F, 0x01, 0xFB }, + { "clzero", 0x0F, 0x01, 0xFC }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); + puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }"); + puts("void int_(uint8_t x) { db(0xCD); db(x); }"); + putLoadSeg("lss", 0x0F, 0xB2); + putLoadSeg("lfs", 0x0F, 0xB4); + putLoadSeg("lgs", 0x0F, 0xB5); } { const struct Tbl { - uint8 code; // (reg, reg) - uint8 ext; // (reg, imm) + uint8_t code; // (reg, reg) + uint8_t ext; // (reg, imm) const char *name; } tbl[] = { { 0x10, 2, "adc" }, @@ -745,14 +798,14 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); + printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); } } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0x48, 1, "dec" }, @@ -765,8 +818,8 @@ void put() } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0xa3, 4, "bt" }, @@ -777,13 +830,13 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code); - printf("void %s(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); + printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); } } { const struct Tbl { - uint8 code; - uint8 ext; + uint8_t code; + uint8_t ext; const char *name; } tbl[] = { { 0xF6, 6, "div" }, @@ -802,7 +855,7 @@ void put() { const struct Tbl { const char *name; - uint8 ext; + uint8_t ext; } tbl[] = { { "rcl", 2 }, { "rcr", 3 }, @@ -823,21 +876,21 @@ void put() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "shld", 0xA4 }, { "shrd", 0xAC }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); + printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code); } } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "bsf", 0xBC }, { "bsr", 0xBD }, @@ -850,7 +903,7 @@ void put() { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "popcnt", 0xB8 }, { "tzcnt", 0xBC }, @@ -864,7 +917,7 @@ void put() // SSSE3 { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0x00, "pshufb" }, @@ -887,12 +940,12 @@ void put() const Tbl *p = &tbl[i]; printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code); } - printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); + printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); } { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "pclmullqlqdq", 0 }, { "pclmulhqlqdq", 1 }, @@ -906,26 +959,41 @@ void put() } { const struct Tbl { - uint8 code1; + uint8_t code1; int code2; - uint8 ext; + uint8_t ext; const char *name; + uint8_t prefix; } tbl[] = { - { 0x0F, 0xAE, 2, "ldmxcsr" }, - { 0x0F, 0xAE, 3, "stmxcsr" }, - { 0x0F, 0xAE, 7, "clflush" }, // 0x80 is bug of nasm ? - { 0xD9, NONE, 5, "fldcw" }, -// { 0x9B, 0xD9, 7, "fstcw" }, // not correct order for fstcw [eax] on 64bit OS + { 0x0F, 0xAE, 2, "ldmxcsr", 0 }, + { 0x0F, 0xAE, 3, "stmxcsr", 0 }, + { 0x0F, 0xAE, 7, "clflush", 0 }, + { 0x0F, 0xAE, 7, "clflushopt", 0x66 }, + { 0xDF, NONE, 4, "fbld", 0 }, + { 0xDF, NONE, 6, "fbstp", 0 }, + { 0xD9, NONE, 5, "fldcw", 0 }, + { 0xD9, NONE, 4, "fldenv", 0 }, + { 0xDD, NONE, 4, "frstor", 0 }, + { 0xDD, NONE, 6, "fsave", 0x9B }, + { 0xDD, NONE, 6, "fnsave", 0 }, + { 0xD9, NONE, 7, "fstcw", 0x9B }, + { 0xD9, NONE, 7, "fnstcw", 0 }, + { 0xD9, NONE, 6, "fstenv", 0x9B }, + { 0xD9, NONE, 6, "fnstenv", 0 }, + { 0xDD, NONE, 7, "fstsw", 0x9B }, + { 0xDD, NONE, 7, "fnstsw", 0 }, + { 0x0F, 0xAE, 1, "fxrstor", 0 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; - printf("void %s(const Address& addr) { opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->name, p->ext, p->code1, p->code2); + putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2); } - printf("void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }\n"); + puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }"); + puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }"); } { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0x2B, "movntpd" }, @@ -939,7 +1007,7 @@ void put() } { const struct Tbl { - uint8 code; + uint8_t code; const char *name; } tbl[] = { { 0xBE, "movsx" }, @@ -950,6 +1018,12 @@ void put() printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code); } } + { // in/out + puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }"); + puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }"); + puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }"); + puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }"); + } // mpx { puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); @@ -963,7 +1037,7 @@ void put() } // misc { - puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }"); + puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }"); puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }"); puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); @@ -975,24 +1049,24 @@ void put() puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); - puts("void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }"); - puts("void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); }"); - puts("void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); }"); - puts("void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }"); - puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); - puts("void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); - puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); - puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }"); + puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }"); + puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }"); + puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }"); + puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); + puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); + puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }"); - puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }"); + puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }"); puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }"); puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }"); puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }"); puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }"); puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }"); puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }"); - puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); }"); + puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }"); puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }"); puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); @@ -1002,18 +1076,18 @@ void put() puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }"); puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }"); puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }"); - puts("void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); - puts("void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }"); } { const struct Tbl { - uint8 m16; - uint8 m32; - uint8 m64; - uint8 ext; + uint8_t m16; + uint8_t m32; + uint8_t m64; + uint8_t ext; const char *name; - uint8 m64ext; + uint8_t m64ext; } tbl[] = { { 0x00, 0xD8, 0xDC, 0, "fadd" }, { 0xDE, 0xDA, 0x00, 0, "fiadd" }, @@ -1046,8 +1120,8 @@ void put() } { const struct Tbl { - uint32 code1; - uint32 code2; + uint32_t code1; + uint32_t code2; const char *name; } tbl[] = { { 0xD8C0, 0xDCC0, "fadd" }, @@ -1091,8 +1165,8 @@ void put() } { const struct Tbl { - uint8 code1; - uint8 code2; + uint8_t code1; + uint8_t code2; const char *name; } tbl[] = { { 0xD8, 0xD0, "fcom" }, @@ -1113,7 +1187,7 @@ void put() // AVX { // pd, ps, sd, ss const struct Tbl { - uint8 code; + uint8_t code; const char *name; bool only_pd_ps; } tbl[] = { @@ -1142,7 +1216,7 @@ void put() // (x, x/m[, imm]) or (y, y/m[, imm]) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; bool hasIMM; @@ -1223,7 +1297,7 @@ void put() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { - const char *immS1 = p->hasIMM ? ", uint8 imm" : ""; + const char *immS1 = p->hasIMM ? ", uint8_t imm" : ""; const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE"; const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE"; @@ -1231,14 +1305,14 @@ void put() } if (p->mode & 2) { printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" - , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } // (m, x), (m, y) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1259,7 +1333,7 @@ void put() // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; int mode; // 1 : sse, 2 : avx, 3 : sse + avx @@ -1280,7 +1354,7 @@ void put() const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { - uint8 pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; + uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : ""); } if (p->mode & 2) { @@ -1308,7 +1382,7 @@ void put() // vpermd, vpermps { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1326,7 +1400,7 @@ void put() // vpermq, vpermpd { const struct Tbl { - uint8 code; + uint8_t code; const char *name; int type; } tbl[] = { @@ -1336,7 +1410,7 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); } } // vcmpeqps @@ -1363,7 +1437,7 @@ void put() const struct Tbl { bool isH; bool isPd; - uint8 code; + uint8_t code; } tbl[] = { { true, true, 0x16 }, { true, false, 0x16 }, @@ -1375,7 +1449,7 @@ void put() char c = p.isH ? 'h' : 'l'; const char *suf = p.isPd ? "pd" : "ps"; const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; - printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" + printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" , c, suf, type, p.code); printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" , c, suf, type, p.code + 1); @@ -1384,7 +1458,7 @@ void put() // FMA { const struct Tbl { - uint8 code; + uint8_t code; const char *name; bool supportYMM; } tbl[] = { @@ -1408,7 +1482,7 @@ void put() for (int k = 0; k < 3; k++) { const struct Ord { const char *str; - uint8 code; + uint8_t code; } ord[] = { { "132", 0x90 }, { "213", 0xA0 }, @@ -1438,10 +1512,10 @@ void put() { printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n"); printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n"); - printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); + printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); const struct Tbl { const char *name; - uint8 code; + uint8_t code; int type; bool ew1; } tbl[] = { @@ -1454,40 +1528,40 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); } - puts("void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); - puts("void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); - puts("void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); - puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); - puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); - puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); - puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); + puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); + puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); + puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); + puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); + puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); + puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); + puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }"); puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }"); puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }"); puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }"); - puts("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); - puts("void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); - puts("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); - puts("void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); + puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); + puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); + puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); + puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); - puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); - puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); - puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); - puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); + puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); + puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); + puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); + puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); - puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); + puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); } // (x, x, imm), (x, imm) { const struct Tbl { const char *name; - uint8 code; + uint8_t code; int idx; int type; } tbl[] = { @@ -1505,14 +1579,14 @@ void put() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); - printf("void v%s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } // 4-op { const struct Tbl { const char *name; - uint8 code; + uint8_t code; } tbl[] = { { "vblendvpd", 0x4B }, { "vblendvps", 0x4A }, @@ -1525,18 +1599,18 @@ void put() } // mov { - printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); - printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); + printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); + printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); - printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); - printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); + printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); + printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); - printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); - printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); + printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); + printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }"); puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }"); @@ -1549,7 +1623,7 @@ void put() int type = T_0F | T_EVEX; type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4); std::string s = type2String(type); - printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); } @@ -1574,7 +1648,7 @@ void put() puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); - puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); + puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); } // haswell gpr(reg, reg, r/m) @@ -1582,7 +1656,7 @@ void put() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; } tbl[] = { { "andn", T_0F38, 0xF2 }, { "mulx", T_F2 | T_0F38, 0xF6 }, @@ -1599,7 +1673,7 @@ void put() const struct Tbl { const char *name; int type; - uint8 code; + uint8_t code; } tbl[] = { { "bextr", T_0F38, 0xF7 }, { "bzhi", T_0F38, 0xF5 }, @@ -1611,15 +1685,15 @@ void put() const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code); } - puts("void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); + puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); } // gpr(reg, r/m) { const struct Tbl { const char *name; int type; - uint8 code; - uint8 idx; + uint8_t code; + uint8_t idx; } tbl[] = { { "blsi", T_0F38, 0xF3, 3 }, { "blsmsk", T_0F38, 0xF3, 2 }, @@ -1637,7 +1711,7 @@ void put() const int x_vy_x = 2; const struct Tbl { const char *name; - uint8 code; + uint8_t code; int w; int mode; } tbl[] = { @@ -1669,6 +1743,7 @@ void put32() { "aas", 0x3F }, { "daa", 0x27 }, { "das", 0x2F }, + { "into", 0xCE }, { "popad", 0x61 }, { "popfd", 0x9D }, { "pusha", 0x60 }, @@ -1677,6 +1752,8 @@ void put32() { "popa", 0x61 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); + putLoadSeg("lds", 0xC5, NONE); + putLoadSeg("les", 0xC4, NONE); } void put64() @@ -1688,18 +1765,24 @@ void put64() { "cdqe", 0x48, 0x98 }, { "cqo", 0x48, 0x99 }, { "cmpsq", 0x48, 0xA7 }, + { "popfq", 0x9D }, + { "pushfq", 0x9C }, + { "lodsq", 0x48, 0xAD }, { "movsq", 0x48, 0xA5 }, { "scasq", 0x48, 0xAF }, { "stosq", 0x48, 0xAB }, + { "syscall", 0x0F, 0x05 }, + { "sysret", 0x0F, 0x07 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); - puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }"); + putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64); + putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64); puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); - puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); - puts("void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); - puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); + puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); + puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); + puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }"); puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }"); @@ -1710,10 +1793,35 @@ void put64() puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); } +void putAMX_TILE() +{ + puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }"); + puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }"); + puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }"); + puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }"); + puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }"); + puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }"); + puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }"); +} +void putAMX_INT8() +{ + puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }"); + puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }"); +} +void putAMX_BF16() +{ + puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }"); +} + void putFixed() { puts("#ifdef XBYAK64"); put64(); + putAMX_TILE(); + putAMX_INT8(); + putAMX_BF16(); puts("#else"); put32(); puts("#endif"); @@ -1724,7 +1832,7 @@ void putFixed() for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name); - printf("void %s(const Operand& op, uint32 imm) { %s_(op, imm); }\n", name, name); + printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name); } puts("void not(const Operand& op) { not_(op); }"); puts("#endif"); @@ -1732,10 +1840,10 @@ void putFixed() void putOmit() { - puts("void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); }"); - puts("void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); }"); - puts("void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); }"); - puts("void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); }"); + puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }"); + puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }"); + puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }"); + puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }"); puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }"); puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }"); @@ -1769,7 +1877,7 @@ void putOmit() }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; - printf("void v%s(const Xmm& x, uint8 imm) { v%s(x, x, imm); }\n", name, name); + printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name); } } { diff --git a/readme.md b/readme.md index 421674d7..6caaa51a 100644 --- a/readme.md +++ b/readme.md @@ -1,22 +1,25 @@ +[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak) -# Xbyak 5.78 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.97 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract -This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. +Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. ## Feature * header file only * Intel/MASM like syntax * fully support AVX-512 -**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang. +**Note**: +Use `and_()`, `or_()`, ... instead of `and()`, `or()`. +If you want to use them, then specify `-fno-operator-names` option to gcc/clang. -Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them. - -and_(), or_(), xor_(), not_() are always available. - -`XBYAK_NO_OP_NAMES` will be defined in the feature version. +### News +- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. +- (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. +- support exception-less mode see. [Exception-less mode](#exception-less-mode) +- `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined. ### Supported OS @@ -47,7 +50,6 @@ These files are copied into `/usr/local/include/xbyak`. Inherit `Xbyak::CodeGenerator` class and make the class method. ``` -#define XBYAK_NO_OP_NAMES #include struct Code : Xbyak::CodeGenerator { @@ -58,6 +60,15 @@ struct Code : Xbyak::CodeGenerator { } }; ``` +Or you can pass the instance of CodeGenerator without inheriting. +``` +void genCode(Xbyak::CodeGenerator& code, int x) { + using namespace Xbyak::util; + code.mov(eax, x); + code.ret(); +} +``` + Make an instance of the class and get the function pointer by calling `getCode()` and call it. ``` @@ -146,6 +157,8 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], ``` ### Remark * `k1`, ..., `k7` are opmask registers. + - `k0` is dealt as no mask. + - e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`. * use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. * `k4 | k3` is different from `k3 | k4`. * use `ptr_b` for broadcast `{1toX}`. X is automatically determined. @@ -212,6 +225,32 @@ void func1() } ``` +### short and long jump +Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. +So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. + +``` +jmp("short-jmp"); // short jmp +// small code +L("short-jmp"); + +jmp("long-jmp"); +// long code +L("long-jmp"); // throw exception +``` +Then specify T_NEAR for jmp. +``` +jmp("long-jmp", T_NEAR); // long jmp +// long code +L("long-jmp"); +``` +Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. +``` +jmp("long-jmp"); // long jmp +// long code +L("long-jmp"); +``` + ### Label class `L()` and `jxx()` support Label class. @@ -369,15 +408,22 @@ c.setProtectModeRE(); Call `readyRE()` instead of `ready()` when using `AutoGrow` mode. See [protect-re.cpp](sample/protect-re.cpp). +## Exception-less mode +If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`. +In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong. +The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`. +`CodeGenerator::reset()` calls `ClearError()`. + ## Macro * **XBYAK32** is defined on 32bit. * **XBYAK64** is defined on 64bit. -* **XBYAK64_WIN** is defined on 64bit Windows(VC) -* **XBYAK64_GCC** is defined on 64bit gcc, cygwin -* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names` -* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future) -* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro +* **XBYAK64_WIN** is defined on 64bit Windows(VC). +* **XBYAK64_GCC** is defined on 64bit gcc, cygwin. +* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, .... +* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future). +* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro. +* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`. ## Sample @@ -392,6 +438,31 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc. +* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later +* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`. +* 2020/Jul/28 ver 5.94 remove #include (only windows) +* 2020/Jul/21 ver 5.93 support exception-less mode +* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov) +* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64 +* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso) +* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined. +* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask) +* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register +* 2020/Feb/26 ver 5.891 fix typo of type +* 2020/Jan/03 ver 5.89 fix error of vfpclasspd +* 2019/Dec/20 ver 5.88 fix compile error on Windows +* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified. +* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available) +* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later +* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined +* 2019/Oct/12 ver 5.83 exit(1) was removed +* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam) +* 2019/Sep/14 ver 5.81 support some generic mnemonics. +* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov) +* 2019/May/27 support vp2intersectd, vp2intersectq (not tested) +* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps +* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky) * 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage) * 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov * 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel @@ -530,3 +601,5 @@ http://opensource.org/licenses/BSD-3-Clause ## Author MITSUNARI Shigeo(herumi@nifty.com) +## Sponsors welcome +[GitHub Sponsor](https://github.com/sponsors/herumi) diff --git a/readme.txt b/readme.txt index 20c77e95..dcbff9e7 100644 --- a/readme.txt +++ b/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.78 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.97 ----------------------------------------------------------------------------- â—Žæ¦‚è¦ @@ -22,21 +22,21 @@ Intel Mac ãªã©ã§å‹•ä½œç¢ºèªã‚’ã—ã¦ã„ã¾ã™ã€‚ -※ Xbyakã¯ãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã§and(), or(), xor(), not()関数を使ã„ã¾ã™ã€‚ -gccã§ã¯ãれらを演算å­ã¨ã—ã¦è§£é‡ˆã—ã¦ã—ã¾ã†ãŸã‚ã€-fno-operator-namesオプションを追加ã—ã¦ã‚³ãƒ³ãƒ‘イルã—ã¦ãã ã•ã„。 -ã‚ã‚‹ã„ã¯XBYAK_NO_OP_NAMESを定義ã—ã¦and_(), or_(), xor_(), not_()を使ã£ã¦ãã ã•ã„。 -and_(), or_(), xor_(), not_()ã¯XBYAK_NO_OP_NAMESã•ã‚Œã¦ã„ãªã„ã¨ãã§ã‚‚使ãˆã¾ã™ã€‚ +※ and, orãªã©ã®ä»£ã‚ã‚Šã«and_, or_を使用ã—ã¦ãã ã•ã„。 +and, orãªã©ã‚’使ã„ãŸã„å ´åˆã¯-fno-operator-namesã‚’gcc/clangã«æŒ‡å®šã—ã¦ãã ã•ã„。 ----------------------------------------------------------------------------- ◎準備 xbyak.h xbyak_bin2hex.h -xbyak_mnemonic.h ã“れらをåŒä¸€ã®ãƒ‘スã«å…¥ã‚Œã¦ã‚¤ãƒ³ã‚¯ãƒ«ãƒ¼ãƒ‰ãƒ‘スã«è¿½åŠ ã—ã¦ãã ã•ã„。 Linuxã§ã¯make installã§/usr/local/include/xbyakã«ã‚³ãƒ”ーã•ã‚Œã¾ã™ã€‚ ----------------------------------------------------------------------------- ◎下ä½äº’æ›æ€§ã®ç ´ã‚Œ +* push byte, immã¾ãŸã¯push word, immãŒä¸‹ä½8bit, 16bitã«ã‚­ãƒ£ã‚¹ãƒˆã—ãŸå€¤ã‚’使ã†ã‚ˆã†ã«å¤‰æ›´ã€‚ +* (Windows) ``ã‚’includeã—ãªããªã£ãŸã®ã§å¿…è¦ãªã‚‰æ˜Žç¤ºçš„ã«includeã—ã¦ãã ã•ã„。 +* XBYAK_USE_MMAP_ALLOCATORãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã§æœ‰åŠ¹ã«ãªã‚Šã¾ã—ãŸã€‚従æ¥ã®æ–¹å¼ã«ã™ã‚‹å ´åˆã¯XBYAK_DONT_USE_MMAP_ALLOCATORを定義ã—ã¦ãã ã•ã„。 * Xbyak::Errorã®åž‹ã‚’enumã‹ã‚‰classã«å¤‰æ›´ ** 従æ¥ã®enumã®å€¤ã‚’ã¨ã‚‹ã«ã¯intã«ã‚­ãƒ£ã‚¹ãƒˆã—ã¦ãã ã•ã„。 * (å¤ã„)Reg32eクラスを(æ–°ã—ã„)Reg32eã¨RegExpã«åˆ†ã‘る。 @@ -46,6 +46,13 @@ Linuxã§ã¯make installã§/usr/local/include/xbyakã«ã‚³ãƒ”ーã•ã‚Œã¾ã™ã€‚ ----------------------------------------------------------------------------- ◎新機能 +例外ãªã—モード追加 +XBYAK_NO_EXCEPTIONを定義ã—ã¦ã‚³ãƒ³ãƒ‘イルã™ã‚‹ã¨gcc/clangã§-fno-exceptionsオプションã§ã‚³ãƒ³ãƒ‘イルã§ãã¾ã™ã€‚ +エラーã¯ä¾‹å¤–ã®ä»£ã‚ã‚Šã«`Xbyak::GetError()`ã§é€šé”ã•ã‚Œã¾ã™ã€‚ +ã“ã®å€¤ãŒ0ã§ãªã‘ã‚Œã°ä½•ã‹å•é¡ŒãŒç™ºç”Ÿã—ã¦ã„ã¾ã™ã€‚ +ã“ã®å€¤ã¯è‡ªå‹•çš„ã«å¤‰æ›´ã•ã‚Œãªã„ã®ã§`Xbyak::ClearError()`ã§ãƒªã‚»ãƒƒãƒˆã—ã¦ãã ã•ã„。 +`CodeGenerator::reset()`ã¯`ClearError()`を呼ã³ã¾ã™ã€‚ + MmapAllocator追加 ã“ã‚Œã¯Unixç³»OSã§ã®ã¿ã®ä»•æ§˜ã§ã™ã€‚XBYAK_USE_MMAP_ALLOCATORを使ã†ã¨åˆ©ç”¨ã§ãã¾ã™ã€‚ デフォルトã®Allocatorã¯ãƒ¡ãƒ¢ãƒªç¢ºä¿æ™‚ã«posix_memalignを使ã„ã¾ã™ã€‚ @@ -54,7 +61,6 @@ map countã®æœ€å¤§å€¤ã¯/proc/sys/vm/max_map_countã«æ›¸ã‹ã‚Œã¦ã„ã¾ã™ã€‚ デフォルトã§ã¯3万個ã»ã©ã®Xbyak::CodeGeneratorインスタンスを生æˆã™ã‚‹ã¨ã‚¨ãƒ©ãƒ¼ã«ãªã‚Šã¾ã™ã€‚ test/mprotect_test.cppã§ç¢ºèªã§ãã¾ã™ã€‚ ã“れをé¿ã‘ã‚‹ãŸã‚ã«ã¯mmapを使ã†MmapAllocatorを使ã£ã¦ãã ã•ã„。 -å°†æ¥ã“ã®æŒ™å‹•ãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆã«ãªã‚‹ã‹ã‚‚ã—ã‚Œã¾ã›ã‚“。 AutoGrowモード追加 @@ -373,6 +379,31 @@ sample/{echo,hello}.bf㯠http://www.kmonos.net/alang/etc/brainfuck.php ã‹ã‚‰ ----------------------------------------------------------------------------- ◎履歴 +2020/09/08 ver 5.97 uint32ãªã©ã‚’uint32_tã«ç½®æ› +2020/08/28 ver 5.95 レジスタクラスã®ã‚³ãƒ³ã‚¹ãƒˆãƒ©ã‚¯ã‚¿ãŒconstexprã«å¯¾å¿œ(C++14以é™) +2020/08/04 ver 5.941 `CodeGenerator::reset()`ãŒ`ClearError()`を呼ã¶ã‚ˆã†ã«å¤‰æ›´ +2020/07/28 ver 5.94 #include ã®å‰Šé™¤ (only windows) +2020/07/21 ver 5.93 例外ãªã—モード追加 +2020/06/30 ver 5.92 Intel AMX命令サãƒãƒ¼ãƒˆ (Thanks to nshustrov) +2020/06/19 ver 5.913 32ビット環境ã§XBYAK64を定義ã—ãŸã¨ãã®mov(r64, imm64)を修正 +2020/06/19 ver 5.912 macOSã®å¤ã„Xcodeã§ã‚‚MAP_JITを有効ã«ã™ã‚‹(Thanks to rsdubtso) +2020/05/10 ver 5.911 Linux/macOSã§XBYAK_USE_MMAP_ALLOCATORãŒãƒ‡ãƒ•ã‚©ãƒ«ãƒˆæœ‰åŠ¹ã«ãªã‚‹ +2020/04/20 ver 5.91 マスクレジスタk0ã‚’å—ã‘入れる(マスクをã—ãªã„) +2020/04/09 ver 5.90 kmov{b,w,d,q}ãŒã‚µãƒãƒ¼ãƒˆã•ã‚Œãªã„レジスタをå—ã‘ã‚‹ã¨ä¾‹å¤–を投ã’ã‚‹ +2020/02/26 ver 5.891 zm0ã®type修正 +2020/01/03 ver 5.89 vfpclasspdã®å‡¦ç†ã‚¨ãƒ©ãƒ¼ä¿®æ­£ +2019/12/20 ver 5.88 Windowsã§ã®ã‚³ãƒ³ãƒ‘イルエラー修正 +2019/12/19 ver 5.87 未定義ラベルã¸ã®jmp命令ã®ãƒ‡ãƒ•ã‚©ãƒ«ãƒˆæŒ™å‹•ã‚’T_NEARã«ã™ã‚‹setDefaultJmpNEAR()を追加 +2019/12/13 ver 5.86 [変更] -fno-operator-namesãŒæŒ‡å®šã•ã‚ŒãŸã¨ãã¯5.84以å‰ã®æŒ™å‹•ã«æˆ»ã™ +2019/12/07 ver 5.85 mmapã«MAP_JITフラグを追加(macOS mojave以上) +2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESãŒå®šç¾©ã•ã‚Œã¦ã„ãªã„é™ã‚ŠXBYAK_NO_OP_NAMESãŒå®šç¾©ã•ã‚Œã‚‹ã‚ˆã†ã«å¤‰æ›´ +2019/10/12 ver 5.83 exit(1)ã®é™¤åŽ» +2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam) +2019/09/14 ver 5.81 ã„ãã¤ã‹ã®ä¸€èˆ¬å‘½ä»¤ã‚’サãƒãƒ¼ãƒˆ +2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov) +2019/05/27 support vp2intersectd, vp2intersectq (not tested) +2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps +2019/04/27 ver 5.79 vcmppd/vcmppsã®ptr_b対応忘れ(thanks to jkopinsky) 2019/04/15 ver 5.78 Reg::changeBit()ã®ãƒªãƒ•ã‚¡ã‚¯ã‚¿ãƒªãƒ³ã‚°(thanks to MerryMage) 2019/03/06 ver 5.77 LLCキャッシュを共有数CPUæ•°ã®ä¿®æ•´(by densamoilov) 2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) diff --git a/sample/Makefile b/sample/Makefile index da21f204..0c100a31 100644 --- a/sample/Makefile +++ b/sample/Makefile @@ -1,10 +1,12 @@ -TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table XBYAK_INC=../xbyak/xbyak.h BOOST_EXIST=$(shell echo "\#include " | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1") UNAME_M=$(shell uname -m) +ONLY_64BIT=0 ifeq ($(shell uname -s),Darwin) + ONLY_64BIT=1 + OS=mac ifeq ($(UNAME_M),x86_64) BIT=64 endif @@ -27,19 +29,27 @@ else endif ifeq ($(BIT),64) -TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64 +TARGET += test64 bf64 memfunc64 test_util64 jmp_table64 ifeq ($(BOOST_EXIST),1) TARGET += calc64 #calc2_64 endif endif +ifneq ($(OS),mac) +TARGET += static_buf64 +endif + + +ifneq ($(ONLY_64BIT),1) + TARGET += test quantize bf toyvm test_util memfunc static_buf jmp_table ifeq ($(BOOST_EXIST),1) -TARGET += calc #calc2 + TARGET += calc #calc2 +endif endif all: $(TARGET) -CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic +CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN) @@ -85,9 +95,13 @@ jmp_table: $(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32 jmp_table64: $(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64 +profiler: profiler.cpp ../xbyak/xbyak_util.h + $(CXX) $(CFLAGS) profiler.cpp -o $@ +profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h + $(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl clean: - rm -rf *.o $(TARGET) *.exe + rm -rf *.o $(TARGET) *.exe profiler profiler-vtune test : test0.cpp $(XBYAK_INC) test64: test0.cpp $(XBYAK_INC) diff --git a/sample/bf.cpp b/sample/bf.cpp index 20a0fd96..2abb8a54 100644 --- a/sample/bf.cpp +++ b/sample/bf.cpp @@ -148,7 +148,7 @@ public: } }; -void dump(const Xbyak::uint8 *code, size_t size) +void dump(const uint8_t *code, size_t size) { puts("#include \nstatic int stack[128 * 1024];"); #ifdef _MSC_VER diff --git a/sample/bf.vcproj b/sample/bf.vcproj deleted file mode 100644 index 968d2c85..00000000 --- a/sample/bf.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/bf.vcxproj b/sample/bf.vcxproj new file mode 100644 index 00000000..88a74376 --- /dev/null +++ b/sample/bf.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {654BD79B-59D3-4B10-BBAA-158BAB272828} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Release/bf.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/bf.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + .\Debug/bf.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/bf.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/bf.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/bf.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/bf.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + X64 + .\Debug/bf.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/bf.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/bf.pdb + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/calc.cpp b/sample/calc.cpp index 3f4a0f9c..29ad9c3a 100644 --- a/sample/calc.cpp +++ b/sample/calc.cpp @@ -155,9 +155,9 @@ struct Grammar : public boost::spirit::classic::grammar { void put(const std::vector& x) { - printf("%f", x[0]); - for (size_t i = 1, n = x.size(); i < n; i++) { - printf(", %f", x[i]); + for (size_t i = 0, n = x.size(); i < n; i++) { + if (i > 0) printf(", "); + printf("%f", x[i]); } } diff --git a/sample/calc.vcproj b/sample/calc.vcproj deleted file mode 100644 index 3c6148aa..00000000 --- a/sample/calc.vcproj +++ /dev/null @@ -1,423 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/calc.vcxproj b/sample/calc.vcxproj new file mode 100644 index 00000000..2846bb3d --- /dev/null +++ b/sample/calc.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {5FDDFAA6-B947-491D-A17E-BBD863846579} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Release/calc.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/calc.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + .\Debug/calc.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/calc.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/calc.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/calc.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/calc.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + X64 + .\Debug/calc.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/calc.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/calc.pdb + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/calc2.cpp b/sample/calc2.cpp index 74b0494a..a13d2cf5 100644 --- a/sample/calc2.cpp +++ b/sample/calc2.cpp @@ -102,7 +102,7 @@ private: MAX_CONST_NUM = 32 }; MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM]; - Xbyak::uint64 negConst_; + Xbyak::uint64_t negConst_; size_t constTblPos_; #ifdef XBYAK32 const Xbyak::Reg32& varTbl_; @@ -118,7 +118,7 @@ public: 64bit: x [rcx](win), xmm0(gcc), return xmm0 */ Jit() - : negConst_(Xbyak::uint64(1) << 63) + : negConst_(Xbyak::uint64_t(1) << 63) , constTblPos_(0) #ifdef XBYAK32 , varTbl_(eax) diff --git a/sample/profiler.cpp b/sample/profiler.cpp new file mode 100644 index 00000000..dc15d9bf --- /dev/null +++ b/sample/profiler.cpp @@ -0,0 +1,90 @@ +/* + How to profile JIT-code with perf or VTune + sudo perf record ./profiler 1 + amplxe-cl -collect hotspots -result-dir r001hs -quiet ./profiler-vtune 2 +*/ +#include +#include +#include +#include + +const int N = 3000000; +struct Code : public Xbyak::CodeGenerator { + Code() + { + mov(eax, N); + Xbyak::Label lp = L(); + for (int i = 0; i < 10; i++) { + sub(eax, 1); + } + jg(lp); + mov(eax, 1); + ret(); + } +}; + +struct Code2 : public Xbyak::CodeGenerator { + Code2() + { + mov(eax, N); + Xbyak::Label lp = L(); + for (int i = 0; i < 10; i++) { + xorps(xm0, xm0); + } + sub(eax, 1); + jg(lp); + mov(eax, 1); + ret(); + } +}; + +double s1(int n) +{ + double r = 0; + for (int i = 0; i < n; i++) { + r += 1.0 / (i + 1); + } + return r; +} + +double s2(int n) +{ + double r = 0; + for (int i = 0; i < n; i++) { + r += 1.0 / (i * i + 1) + 2.0 / (i + 3); + } + return r; +} + +int main(int argc, char *argv[]) +{ + int mode = argc == 1 ? 0 : atoi(argv[1]); + Code c; + Code2 c2; + int (*f)() = (int (*)())c.getCode(); + int (*g)() = (int (*)())c2.getCode(); + + printf("f:%p, %d\n", (const void*)f, (int)c.getSize()); + printf("g:%p, %d\n", (const void*)g, (int)c2.getSize()); + Xbyak::util::Profiler prof; + printf("mode=%d\n", mode); + prof.init(mode); + prof.set("f", (const void*)f, c.getSize()); + prof.set("g", (const void*)g, c2.getSize()); + + double sum = 0; + for (int i = 0; i < 20000; i++) { + sum += s1(i); + sum += s2(i); + } + printf("sum=%f\n", sum); + for (int i = 0; i < 2000; i++) { + sum += f(); + } + printf("f=%f\n", sum); + for (int i = 0; i < 2000; i++) { + sum += g(); + } + printf("g=%f\n", sum); + puts("end"); +} diff --git a/sample/quantize.cpp b/sample/quantize.cpp index c3eeafb3..6bdf0d00 100644 --- a/sample/quantize.cpp +++ b/sample/quantize.cpp @@ -5,12 +5,12 @@ This program generates a quantization routine by using fast division algorithm in run-time. time(sec) - quality 1(low) 10 50 100(high) + quality 1(high) 10 50 100(low) VC2005 8.0 8.0 8.0 8.0 Xbyak 1.6 0.8 0.5 0.5 -; generated code at q = 100 +; generated code at q = 1 push esi push edi mov edi,dword ptr [esp+0Ch] @@ -48,9 +48,6 @@ #pragma warning(disable : 4996) // scanf #endif -typedef Xbyak::uint64 uint64; -typedef Xbyak::uint32 uint32; - const int N = 64; class Quantize : public Xbyak::CodeGenerator { @@ -66,7 +63,7 @@ public: output : eax = [esi+offset] / dividend destroy : edx */ - void udiv(uint32 dividend, int offset) + void udiv(uint32_t dividend, int offset) { mov(eax, ptr[esi + offset]); @@ -83,11 +80,11 @@ public: return; } - uint64 mLow, mHigh; + uint64_t mLow, mHigh; int len = ilog2(odd) + 1; { - uint64 roundUp = uint64(1) << (32 + len); - uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd)); + uint64_t roundUp = uint64_t(1) << (32 + len); + uint64_t k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd)); mLow = roundUp / odd; mHigh = (roundUp + k) / odd; } @@ -96,12 +93,12 @@ public: mLow >>= 1; mHigh >>= 1; len--; } - uint64 m; int a; + uint64_t m; int a; if ((mHigh >> 32) == 0) { m = mHigh; a = 0; } else { len = ilog2(odd); - uint64 roundDown = uint64(1) << (32 + len); + uint64_t roundDown = uint64_t(1) << (32 + len); mLow = roundDown / odd; int r = (int)(roundDown % odd); m = (r <= (odd >> 1)) ? mLow : mLow + 1; @@ -124,9 +121,9 @@ public: mov(eax, edx); } /* - quantize(uint32 dest[64], const uint32 src[64]); + quantize(uint32_t dest[64], const uint32_t src[64]); */ - Quantize(const uint32 qTbl[64]) + Quantize(const uint32_t qTbl[64]) { push(esi); push(edi); @@ -143,7 +140,7 @@ public: } }; -void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64]) +void quantize(uint32_t dest[64], const uint32_t src[64], const uint32_t qTbl[64]) { for (int i = 0; i < N; i++) { dest[i] = src[i] / qTbl[i]; @@ -170,7 +167,7 @@ int main(int argc, char *argv[]) } } printf("q=%d\n", q); - uint32 qTbl[] = { + uint32_t qTbl[] = { 16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55, 14, 13, 16, 24, 40, 57, 69, 56, @@ -187,16 +184,16 @@ int main(int argc, char *argv[]) } try { - uint32 src[N]; - uint32 dest[N]; - uint32 dest2[N]; + uint32_t src[N]; + uint32_t dest[N]; + uint32_t dest2[N]; for (int i = 0; i < N; i++) { src[i] = rand() % 2048; } Quantize jit(qTbl); //printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode()); - void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode(); + void (*quantize2)(uint32_t*, const uint32_t*, const uint32_t *) = jit.getCode(); quantize(dest, src, qTbl); quantize2(dest2, src, qTbl); diff --git a/sample/quantize.vcproj b/sample/quantize.vcproj deleted file mode 100644 index a197e63c..00000000 --- a/sample/quantize.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/quantize.vcxproj b/sample/quantize.vcxproj new file mode 100644 index 00000000..5d073e74 --- /dev/null +++ b/sample/quantize.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {D06753BF-E1F3-4578-9B18-08673327F77C} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/quantize.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/quantize.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/quantize.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/quantize.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/quantize.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/quantize.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/quantize.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/quantize.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/quantize.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/quantize.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/test0.cpp b/sample/test0.cpp index 5a4d91ba..afbaf83f 100644 --- a/sample/test0.cpp +++ b/sample/test0.cpp @@ -163,15 +163,15 @@ int main() // use memory allocated by user using namespace Xbyak; const size_t codeSize = 4096; - uint8 buf[codeSize + 16]; - uint8 *p = CodeArray::getAlignedAddress(buf); + uint8_t buf[codeSize + 16]; + uint8_t *p = CodeArray::getAlignedAddress(buf); Sample s(p, codeSize); if (!CodeArray::protect(p, codeSize, CodeArray::PROTECT_RWE)) { fprintf(stderr, "can't protect\n"); return 1; } int (*func)(int) = s.getCode(); - const uint8 *funcp = reinterpret_cast(func); + const uint8_t *funcp = reinterpret_cast(func); if (funcp != p) { fprintf(stderr, "internal error %p %p\n", p, funcp); return 1; diff --git a/sample/test0.vcproj b/sample/test0.vcproj deleted file mode 100644 index 7ed55712..00000000 --- a/sample/test0.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/test0.vcxproj b/sample/test0.vcxproj new file mode 100644 index 00000000..847db6cf --- /dev/null +++ b/sample/test0.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/test0.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test0.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test0.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/test0.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test0.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/test0.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test0.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test0.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/test0.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test0.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/test_util.cpp b/sample/test_util.cpp index d75a5e06..afb6e5a4 100644 --- a/sample/test_util.cpp +++ b/sample/test_util.cpp @@ -78,6 +78,8 @@ void putCPUinfo() { Cpu::tAVX512_VNNI, "avx512_vnni" }, { Cpu::tAVX512_BITALG, "avx512_bitalg" }, { Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" }, + { Cpu::tAVX512_BF16, "avx512_bf16" }, + { Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str); diff --git a/sample/test_util.vcproj b/sample/test_util.vcproj deleted file mode 100644 index 88de7d9c..00000000 --- a/sample/test_util.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/test_util.vcxproj b/sample/test_util.vcxproj new file mode 100644 index 00000000..0ed75e1d --- /dev/null +++ b/sample/test_util.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/test_util.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test_util.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test_util.pdb + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/test_util.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/test_util.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/test_util.pdb + Console + false + + MachineX64 + + + true + + + + + .\Release/test_util.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test_util.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Release/test_util.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/test_util.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/sample/toyvm.cpp b/sample/toyvm.cpp index cd869ea3..1e558ff0 100644 --- a/sample/toyvm.cpp +++ b/sample/toyvm.cpp @@ -39,7 +39,7 @@ using namespace Xbyak; class ToyVm : public Xbyak::CodeGenerator { - typedef std::vector Buffer; + typedef std::vector Buffer; public: enum Reg { A, B @@ -53,14 +53,14 @@ public: { ::memset(mem_, 0, sizeof(mem_)); } - void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); } - void vld(Reg r, uint16 idx) { encode(LD, r, idx); } - void vst(Reg r, uint16 idx) { encode(ST, r, idx); } - void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); } - void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); } - void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); } - void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); } - void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast(offset)); } + void vldi(Reg r, uint16_t imm) { encode(LDI, r, imm); } + void vld(Reg r, uint16_t idx) { encode(LD, r, idx); } + void vst(Reg r, uint16_t idx) { encode(ST, r, idx); } + void vadd(Reg r, uint16_t idx) { encode(ADD, r, idx); } + void vaddi(Reg r, uint16_t imm) { encode(ADDI, r, imm); } + void vsub(Reg r, uint16_t idx) { encode(SUB, r, idx); } + void vsubi(Reg r, uint16_t imm) { encode(SUBI, r, imm); } + void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast(offset)); } void vput(Reg r) { encode(PUT, r); } void setMark() { @@ -73,12 +73,12 @@ public: void run() { bool debug = false;//true; - uint32 reg[2] = { 0, 0 }; + uint32_t reg[2] = { 0, 0 }; const size_t end = code_.size(); - uint32 pc = 0; + uint32_t pc = 0; for (;;) { - uint32 x = code_[pc]; - uint32 code, r, imm; + uint32_t x = code_[pc]; + uint32_t code, r, imm; decode(code, r, imm, x); if (debug) { printf("---\n"); @@ -149,11 +149,11 @@ public: xor_(edi, edi); mov(mem, (size_t)mem_); const size_t end = code_.size(); - uint32 pc = 0; - uint32 labelNum = 0; + uint32_t pc = 0; + uint32_t labelNum = 0; for (;;) { - uint32 x = code_[pc]; - uint32 code, r, imm; + uint32_t x = code_[pc]; + uint32_t code, r, imm; decode(code, r, imm, x); L(Label::toStr(labelNum++)); switch (code) { @@ -229,18 +229,18 @@ public: ret(); } private: - uint32 mem_[65536]; + uint32_t mem_[65536]; Buffer code_; int mark_; - void decode(uint32& code, uint32& r, uint32& imm, uint32 x) + void decode(uint32_t& code, uint32_t& r, uint32_t& imm, uint32_t x) { code = x >> 24; r = (x >> 16) & 0xff; imm = x & 0xffff; } - void encode(Code code, Reg r, uint16 imm = 0) + void encode(Code code, Reg r, uint16_t imm = 0) { - uint32 x = (code << 24) | (r << 16) | imm; + uint32_t x = (code << 24) | (r << 16) | imm; code_.push_back(x); } }; @@ -262,7 +262,7 @@ public: */ vldi(A, 1); // c vst(A, 0); // p(1) - vldi(B, static_cast(n)); + vldi(B, static_cast(n)); vst(B, 2); // n // lp setMark(); @@ -283,9 +283,9 @@ public: } }; -void fibC(uint32 n) +void fibC(uint32_t n) { - uint32 p, c, t; + uint32_t p, c, t; p = 1; c = 1; lp: diff --git a/sample/toyvm.vcproj b/sample/toyvm.vcproj deleted file mode 100644 index 08d4f05e..00000000 --- a/sample/toyvm.vcproj +++ /dev/null @@ -1,427 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sample/toyvm.vcxproj b/sample/toyvm.vcxproj new file mode 100644 index 00000000..16635168 --- /dev/null +++ b/sample/toyvm.vcxproj @@ -0,0 +1,228 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {2E41C7AF-39FF-454C-B081-37445378DCB3} + 10.0.17763.0 + + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + Application + v141 + false + MultiByte + + + + + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27924.0 + + + true + $(ProjectName)\$(Configuration)\ + + + false + $(ProjectName)\$(Configuration)\ + + + true + $(ProjectName)\$(Platform)\$(Configuration)\ + + + false + $(ProjectName)\$(Platform)\$(Configuration)\ + + + + .\Debug/toyvm.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/toyvm.pch + Level4 + true + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/toyvm.pdb + Console + false + + MachineX86 + + + true + + + + + .\Release/toyvm.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/toyvm.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX86 + + + true + + + + + X64 + .\Debug/toyvm.tlb + + + + Disabled + ../;%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + .\Debug/toyvm.pch + Level4 + true + ProgramDatabase + + + _DEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + true + .\Debug/toyvm.pdb + Console + false + + MachineX64 + + + true + + + + + X64 + .\Release/toyvm.tlb + + + + MaxSpeed + OnlyExplicitInline + ../;%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + MultiThreaded + true + .\Release/toyvm.pch + Level4 + true + + + NDEBUG;%(PreprocessorDefinitions) + 0x0411 + + + true + Console + false + + MachineX64 + + + true + + + + + + + + + \ No newline at end of file diff --git a/test/Makefile b/test/Makefile index 37a678cb..ac69b3e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,9 +1,18 @@ -TARGET = make_nm normalize_prefix jmp address bad_address misc cvt_test cvt_test32 +TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception XBYAK_INC=../xbyak/xbyak.h +UNAME_S=$(shell uname -s) BIT=32 ifeq ($(shell uname -m),x86_64) BIT=64 endif +ONLY_64BIT=0 +ifeq ($(UNAME_S),Darwin) + # 32-bit binary is not supported + ONLY_64BIT=1 +endif +ifeq ($(ONLY_64BIT),0) + TARGET += jmp address +endif ifeq ($(BIT),64) TARGET += jmp64 address64 @@ -36,18 +45,24 @@ cvt_test: cvt_test.cpp ../xbyak/xbyak.h $(CXX) $(CFLAGS) $< -o $@ cvt_test32: cvt_test.cpp ../xbyak/xbyak.h $(CXX) $(CFLAGS) $< -o $@ -DXBYAK32 +noexception: noexception.cpp ../xbyak/xbyak.h + $(CXX) $(CFLAGS) $< -o $@ -fno-exceptions -test: normalize_prefix jmp bad_address $(TARGET) +test_nm: normalize_prefix $(TARGET) $(MAKE) -C ../gen +ifneq ($(ONLY_64BIT),1) ./test_nm.sh + ./test_nm.sh noexcept + ./noexception ./test_nm.sh Y ./test_nm.sh avx512 ./test_address.sh ./jmp + ./cvt_test32 +endif ./bad_address ./misc ./cvt_test - ./cvt_test32 ifeq ($(BIT),64) ./test_address.sh 64 ./test_nm.sh 64 @@ -56,8 +71,10 @@ ifeq ($(BIT),64) endif test_avx: normalize_prefix +ifneq ($(ONLY_64BIT),0) ./test_avx.sh ./test_avx.sh Y +endif ifeq ($(BIT),64) ./test_address.sh 64 ./test_avx.sh 64 @@ -65,10 +82,18 @@ ifeq ($(BIT),64) endif test_avx512: normalize_prefix +ifneq ($(ONLY_64BIT),0) ./test_avx512.sh +endif ifeq ($(BIT),64) ./test_avx512.sh 64 endif + +test: + $(MAKE) test_nm + $(MAKE) test_avx + $(MAKE) test_avx512 + clean: rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512 diff --git a/test/bad_address.cpp b/test/bad_address.cpp index a74dd993..6d74a751 100644 --- a/test/bad_address.cpp +++ b/test/bad_address.cpp @@ -1,47 +1,28 @@ #include - -#define TEST_EXCEPTION(state) \ -{ \ - num++; \ - bool exception = false; \ - try { \ - state; \ - } catch (...) { \ - exception = true; \ - } \ - if (!exception) { \ - printf("exception should arise for %s\n", #state); \ - err++; \ - } \ -} +#include struct Code : Xbyak::CodeGenerator { Code() { - int err = 0; - int num = 0; - TEST_EXCEPTION(mov(eax, ptr [esp + esp])); - TEST_EXCEPTION(mov(eax, ptr [ax])); // not support - TEST_EXCEPTION(mov(eax, ptr [esp * 4])); - TEST_EXCEPTION(mov(eax, ptr [eax * 16])); - TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax])); - TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4])); - TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4])); - TEST_EXCEPTION(mov(eax, ptr [xmm0])); - TEST_EXCEPTION(fld(dword [xmm0])); - TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3)); - TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3)); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp + esp]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [ax]), std::exception); // not support + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [esp * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 16]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0]), std::exception); + CYBOZU_TEST_EXCEPTION(fld(dword [xmm0]), std::exception); + CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3), std::exception); + CYBOZU_TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3), std::exception); #ifdef XBYAK64 - TEST_EXCEPTION(mov(eax, ptr [rax + eax])); - TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0])); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [rax + eax]), std::exception); + CYBOZU_TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]), std::exception); #endif - if (!err) { - printf("bad_address test %d ok\n", num); - } } }; -int main() +CYBOZU_TEST_AUTO(exception) { Code c; } diff --git a/test/jmp.cpp b/test/jmp.cpp index 9fe8ff69..e9192b2f 100644 --- a/test/jmp.cpp +++ b/test/jmp.cpp @@ -117,7 +117,7 @@ CYBOZU_TEST_AUTO(test1) int offset; bool isBack; bool isShort; - uint8 result[6]; + uint8_t result[6]; int size; } tbl[] = { { 0, true, true, { 0xeb, 0xfe }, 2 }, @@ -133,7 +133,7 @@ CYBOZU_TEST_AUTO(test1) const Tbl *p = &tbl[i]; for (int k = 0; k < 2; k++) { TestJmp jmp(p->offset, p->isBack, p->isShort, k == 0); - const uint8 *q = (const uint8*)jmp.getCode(); + const uint8_t *q = (const uint8_t*)jmp.getCode(); if (p->isBack) q += p->offset; /* skip nop */ for (int j = 0; j < p->size; j++) { CYBOZU_TEST_EQUAL(q[j], p->result[j]); @@ -205,6 +205,41 @@ CYBOZU_TEST_AUTO(testJmpCx) } } +CYBOZU_TEST_AUTO(loop) +{ + const uint8_t ok[] = { + // lp: + 0x31, 0xC0, // xor eax, eax + 0xE2, 0xFC, // loop lp + 0xE0, 0xFA, // loopne lp + 0xE1, 0xF8, // loope lp + }; + struct Code : CodeGenerator { + Code(bool useLabel) + { + if (useLabel) { + Xbyak::Label lp = L(); + xor_(eax, eax); + loop(lp); + loopne(lp); + loope(lp); + } else { + L("@@"); + xor_(eax, eax); + loop("@b"); + loopne("@b"); + loope("@b"); + } + } + }; + Code code1(false); + CYBOZU_TEST_EQUAL(code1.getSize(), sizeof(ok)); + CYBOZU_TEST_EQUAL_ARRAY(code1.getCode(), ok, sizeof(ok)); + Code code2(true); + CYBOZU_TEST_EQUAL(code2.getSize(), sizeof(ok)); + CYBOZU_TEST_EQUAL_ARRAY(code2.getCode(), ok, sizeof(ok)); +} + #ifdef _MSC_VER #pragma warning(disable : 4310) #endif @@ -337,11 +372,11 @@ CYBOZU_TEST_AUTO(test3) } #endif -Xbyak::uint8 bufL[4096 * 32]; -Xbyak::uint8 bufS[4096 * 2]; +uint8_t bufL[4096 * 32]; +uint8_t bufS[4096 * 2]; struct MyAllocator : Xbyak::Allocator { - Xbyak::uint8 *alloc(size_t size) + uint8_t *alloc(size_t size) { if (size < sizeof(bufS)) { printf("test use bufS(%d)\n", (int)size); @@ -354,7 +389,7 @@ struct MyAllocator : Xbyak::Allocator { fprintf(stderr, "no memory %d\n", (int)size); exit(1); } - void free(Xbyak::uint8 *) + void free(uint8_t *) { } } myAlloc; @@ -393,6 +428,7 @@ CYBOZU_TEST_AUTO(test4) } } +#ifndef __APPLE__ CYBOZU_TEST_AUTO(test5) { struct Test5 : Xbyak::CodeGenerator { @@ -440,8 +476,9 @@ CYBOZU_TEST_AUTO(test5) gm.assign((const char*)gc.getCode(), gc.getSize()); CYBOZU_TEST_EQUAL(fm, gm); } +#endif -size_t getValue(const uint8* p) +size_t getValue(const uint8_t* p) { size_t v = 0; for (size_t i = 0; i < sizeof(size_t); i++) { @@ -450,7 +487,7 @@ size_t getValue(const uint8* p) return v; } -void checkAddr(const uint8 *p, size_t offset, size_t expect) +void checkAddr(const uint8_t *p, size_t offset, size_t expect) { size_t v = getValue(p + offset); CYBOZU_TEST_EQUAL(v, size_t(p) + expect); @@ -498,7 +535,7 @@ CYBOZU_TEST_AUTO(MovLabel) const struct { int pos; - uint8 ok; + uint8_t ok; } tbl[] = { #ifdef XBYAK32 { 0x00, 0x90 }, @@ -532,11 +569,11 @@ CYBOZU_TEST_AUTO(MovLabel) const bool useNewLabel = k == 0; MovLabelCode code(grow, useNewLabel); if (grow) code.ready(); - const uint8* const p = code.getCode(); + const uint8_t* const p = code.getCode(); for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { int pos = tbl[i].pos; - uint8 x = p[pos]; - uint8 ok = tbl[i].ok; + uint8_t x = p[pos]; + uint8_t ok = tbl[i].ok; CYBOZU_TEST_EQUAL(x, ok); } #ifdef XBYAK32 @@ -1182,11 +1219,11 @@ CYBOZU_TEST_AUTO(rip_jmp) CYBOZU_TEST_EQUAL(ret, ret1234() + ret9999()); } -#ifdef XBYAK64_GCC +#if 0 CYBOZU_TEST_AUTO(rip_addr) { /* - assume |&x - &code| < 2GiB + we can't assume |&x - &code| < 2GiB anymore */ static int x = 5; struct Code : Xbyak::CodeGenerator { @@ -1201,6 +1238,8 @@ CYBOZU_TEST_AUTO(rip_addr) CYBOZU_TEST_EQUAL(x, 123); } #endif + +#ifndef __APPLE__ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf) { MIE_ALIGN(4096) static char buf[8192]; @@ -1225,6 +1264,7 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf) code.setProtectModeRW(); } #endif +#endif struct ReleaseTestCode : Xbyak::CodeGenerator { ReleaseTestCode(Label& L1, Label& L2, Label& L3) @@ -1270,3 +1310,76 @@ CYBOZU_TEST_AUTO(release_label_after_code) printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId()); } } + +struct JmpTypeCode : Xbyak::CodeGenerator { + void nops() + { + for (int i = 0; i < 130; i++) { + nop(); + } + } + // return jmp code size + size_t gen(bool pre, bool large, Xbyak::CodeGenerator::LabelType type) + { + Label label; + if (pre) { + L(label); + if (large) nops(); + size_t pos = getSize(); + jmp(label, type); + return getSize() - pos; + } else { + size_t pos = getSize(); + jmp(label, type); + size_t size = getSize() - pos; + if (large) nops(); + L(label); + return size; + } + } +}; + +CYBOZU_TEST_AUTO(setDefaultJmpNEAR) +{ + const Xbyak::CodeGenerator::LabelType T_SHORT = Xbyak::CodeGenerator::T_SHORT; + const Xbyak::CodeGenerator::LabelType T_NEAR = Xbyak::CodeGenerator::T_NEAR; + const Xbyak::CodeGenerator::LabelType T_AUTO = Xbyak::CodeGenerator::T_AUTO; + const struct { + bool pre; + bool large; + Xbyak::CodeGenerator::LabelType type; + size_t expect1; // 0 means exception + size_t expect2; + } tbl[] = { + { false, false, T_SHORT, 2, 2 }, + { false, false, T_NEAR, 5, 5 }, + { false, true, T_SHORT, 0, 0 }, + { false, true, T_NEAR, 5, 5 }, + + { true, false, T_SHORT, 2, 2 }, + { true, false, T_NEAR, 5, 5 }, + { true, true, T_SHORT, 0, 0 }, + { true, true, T_NEAR, 5, 5 }, + + { false, false, T_AUTO, 2, 5 }, + { false, true, T_AUTO, 0, 5 }, + { true, false, T_AUTO, 2, 2 }, + { true, true, T_AUTO, 5, 5 }, + }; + JmpTypeCode code1, code2; + code2.setDefaultJmpNEAR(true); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + if (tbl[i].expect1) { + size_t size = code1.gen(tbl[i].pre, tbl[i].large, tbl[i].type); + CYBOZU_TEST_EQUAL(size, tbl[i].expect1); + } else { + CYBOZU_TEST_EXCEPTION(code1.gen(tbl[i].pre, tbl[i].large, tbl[i].type), std::exception); + } + if (tbl[i].expect2) { + size_t size = code2.gen(tbl[i].pre, tbl[i].large, tbl[i].type); + CYBOZU_TEST_EQUAL(size, tbl[i].expect2); + } else { + CYBOZU_TEST_EXCEPTION(code2.gen(tbl[i].pre, tbl[i].large, tbl[i].type), std::exception); + } + } +} diff --git a/test/make_512.cpp b/test/make_512.cpp index 49d082c4..7b2d7f3b 100644 --- a/test/make_512.cpp +++ b/test/make_512.cpp @@ -9,111 +9,111 @@ using namespace Xbyak; const int bitEnd = 64; -const uint64 YMM_SAE = 1ULL << 0; -const uint64 _XMM = 1ULL << 1; -const uint64 _MEM = 1ULL << 2; -const uint64 _REG32 = 1ULL << 3; -const uint64 EAX = 1ULL << 4; -const uint64 IMM32 = 1ULL << 5; -const uint64 IMM8 = 1ULL << 6; -const uint64 _REG8 = 1ULL << 7; -const uint64 _REG16 = 1ULL << 8; -const uint64 XMM_K = 1ULL << 9; -const uint64 YMM_K = 1ULL << 10; -const uint64 ZMM_K = 1ULL << 11; -const uint64 AX = 1ULL << 12; -const uint64 AL = 1ULL << 13; -const uint64 IMM_1 = 1ULL << 14; -const uint64 MEM8 = 1ULL << 15; -const uint64 MEM16 = 1ULL << 16; -const uint64 MEM32 = 1ULL << 17; -const uint64 VM32Z = 1ULL << 19; -const uint64 K_K = 1ULL << 20; -const uint64 MEM_ONLY_DISP = 1ULL << 21; -const uint64 VM32X_K = 1ULL << 23; -const uint64 _YMM = 1ULL << 24; -const uint64 VM32X_32 = 1ULL << 39; -const uint64 VM32X_64 = 1ULL << 40; -const uint64 VM32Y_32 = 1ULL << 41; -const uint64 VM32Y_64 = 1ULL << 42; -const uint64 VM32Z_K = 1ULL << 32; +const uint64_t YMM_SAE = 1ULL << 0; +const uint64_t _XMM = 1ULL << 1; +const uint64_t _MEM = 1ULL << 2; +const uint64_t _REG32 = 1ULL << 3; +const uint64_t EAX = 1ULL << 4; +const uint64_t IMM32 = 1ULL << 5; +const uint64_t IMM8 = 1ULL << 6; +const uint64_t _REG8 = 1ULL << 7; +const uint64_t _REG16 = 1ULL << 8; +const uint64_t XMM_K = 1ULL << 9; +const uint64_t YMM_K = 1ULL << 10; +const uint64_t ZMM_K = 1ULL << 11; +const uint64_t AX = 1ULL << 12; +const uint64_t AL = 1ULL << 13; +const uint64_t IMM_1 = 1ULL << 14; +const uint64_t MEM8 = 1ULL << 15; +const uint64_t MEM16 = 1ULL << 16; +const uint64_t MEM32 = 1ULL << 17; +const uint64_t VM32Z = 1ULL << 19; +const uint64_t K_K = 1ULL << 20; +const uint64_t MEM_ONLY_DISP = 1ULL << 21; +const uint64_t VM32X_K = 1ULL << 23; +const uint64_t _YMM = 1ULL << 24; +const uint64_t VM32X_32 = 1ULL << 39; +const uint64_t VM32X_64 = 1ULL << 40; +const uint64_t VM32Y_32 = 1ULL << 41; +const uint64_t VM32Y_64 = 1ULL << 42; +const uint64_t VM32Z_K = 1ULL << 32; #ifdef XBYAK64 -const uint64 _MEMe = 1ULL << 25; -const uint64 REG32_2 = 1ULL << 26; // r8d, ... -const uint64 REG16_2 = 1ULL << 27; // r8w, ... -const uint64 REG8_2 = 1ULL << 28; // r8b, ... -const uint64 REG8_3 = 1ULL << 29; // spl, ... -const uint64 _REG64 = 1ULL << 30; // rax, ... -const uint64 _REG64_2 = 1ULL << 31; // r8, ... -const uint64 _XMM2 = 1ULL << 33; -const uint64 _YMM2 = 1ULL << 34; -const uint64 VM32X = VM32X_32 | VM32X_64; -const uint64 VM32Y = VM32Y_32 | VM32Y_64; +const uint64_t _MEMe = 1ULL << 25; +const uint64_t REG32_2 = 1ULL << 26; // r8d, ... +const uint64_t REG16_2 = 1ULL << 27; // r8w, ... +const uint64_t REG8_2 = 1ULL << 28; // r8b, ... +const uint64_t REG8_3 = 1ULL << 29; // spl, ... +const uint64_t _REG64 = 1ULL << 30; // rax, ... +const uint64_t _REG64_2 = 1ULL << 31; // r8, ... +const uint64_t _XMM2 = 1ULL << 33; +const uint64_t _YMM2 = 1ULL << 34; +const uint64_t VM32X = VM32X_32 | VM32X_64; +const uint64_t VM32Y = VM32Y_32 | VM32Y_64; #else -const uint64 _MEMe = 0; -const uint64 REG32_2 = 0; -const uint64 REG16_2 = 0; -const uint64 REG8_2 = 0; -const uint64 REG8_3 = 0; -const uint64 _REG64 = 0; -const uint64 _REG64_2 = 0; -const uint64 _XMM2 = 0; -const uint64 _YMM2 = 0; -const uint64 VM32X = VM32X_32; -const uint64 VM32Y = VM32Y_32; +const uint64_t _MEMe = 0; +const uint64_t REG32_2 = 0; +const uint64_t REG16_2 = 0; +const uint64_t REG8_2 = 0; +const uint64_t REG8_3 = 0; +const uint64_t _REG64 = 0; +const uint64_t _REG64_2 = 0; +const uint64_t _XMM2 = 0; +const uint64_t _YMM2 = 0; +const uint64_t VM32X = VM32X_32; +const uint64_t VM32Y = VM32Y_32; #endif -const uint64 REG64 = _REG64 | _REG64_2; -const uint64 REG32 = _REG32 | REG32_2 | EAX; -const uint64 REG16 = _REG16 | REG16_2 | AX; -const uint64 REG32e = REG32 | REG64; -const uint64 REG8 = _REG8 | REG8_2|AL; -const uint64 MEM = _MEM | _MEMe; -const uint64 MEM64 = 1ULL << 35; -const uint64 YMM_ER = 1ULL << 36; -const uint64 VM32Y_K = 1ULL << 37; -const uint64 IMM_2 = 1ULL << 38; -const uint64 IMM = IMM_1 | IMM_2; -const uint64 YMM = _YMM | _YMM2; -const uint64 K = 1ULL << 43; -const uint64 _ZMM = 1ULL << 44; -const uint64 _ZMM2 = 1ULL << 45; +const uint64_t REG64 = _REG64 | _REG64_2; +const uint64_t REG32 = _REG32 | REG32_2 | EAX; +const uint64_t REG16 = _REG16 | REG16_2 | AX; +const uint64_t REG32e = REG32 | REG64; +const uint64_t REG8 = _REG8 | REG8_2|AL; +const uint64_t MEM = _MEM | _MEMe; +const uint64_t MEM64 = 1ULL << 35; +const uint64_t YMM_ER = 1ULL << 36; +const uint64_t VM32Y_K = 1ULL << 37; +const uint64_t IMM_2 = 1ULL << 38; +const uint64_t IMM = IMM_1 | IMM_2; +const uint64_t YMM = _YMM | _YMM2; +const uint64_t K = 1ULL << 43; +const uint64_t _ZMM = 1ULL << 44; +const uint64_t _ZMM2 = 1ULL << 45; #ifdef XBYAK64 -const uint64 ZMM = _ZMM | _ZMM2; -const uint64 _YMM3 = 1ULL << 46; +const uint64_t ZMM = _ZMM | _ZMM2; +const uint64_t _YMM3 = 1ULL << 46; #else -const uint64 ZMM = _ZMM; -const uint64 _YMM3 = 0; +const uint64_t ZMM = _ZMM; +const uint64_t _YMM3 = 0; #endif -const uint64 K2 = 1ULL << 47; -const uint64 ZMM_SAE = 1ULL << 48; -const uint64 ZMM_ER = 1ULL << 49; +const uint64_t K2 = 1ULL << 47; +const uint64_t ZMM_SAE = 1ULL << 48; +const uint64_t ZMM_ER = 1ULL << 49; #ifdef XBYAK64 -const uint64 _XMM3 = 1ULL << 50; +const uint64_t _XMM3 = 1ULL << 50; #else -const uint64 _XMM3 = 0; +const uint64_t _XMM3 = 0; #endif -const uint64 XMM = _XMM | _XMM2 | _XMM3; -const uint64 XMM_SAE = 1ULL << 51; +const uint64_t XMM = _XMM | _XMM2 | _XMM3; +const uint64_t XMM_SAE = 1ULL << 51; #ifdef XBYAK64 -const uint64 XMM_KZ = 1ULL << 52; -const uint64 YMM_KZ = 1ULL << 53; -const uint64 ZMM_KZ = 1ULL << 54; +const uint64_t XMM_KZ = 1ULL << 52; +const uint64_t YMM_KZ = 1ULL << 53; +const uint64_t ZMM_KZ = 1ULL << 54; #else -const uint64 XMM_KZ = 0; -const uint64 YMM_KZ = 0; -const uint64 ZMM_KZ = 0; +const uint64_t XMM_KZ = 0; +const uint64_t YMM_KZ = 0; +const uint64_t ZMM_KZ = 0; #endif -const uint64 MEM_K = 1ULL << 55; -const uint64 M_1to2 = 1ULL << 56; -const uint64 M_1to4 = 1ULL << 57; -const uint64 M_1to8 = 1ULL << 58; -const uint64 M_1to16 = 1ULL << 59; -const uint64 XMM_ER = 1ULL << 60; -const uint64 M_xword = 1ULL << 61; -const uint64 M_yword = 1ULL << 62; -const uint64 MY_1to4 = 1ULL << 18; +const uint64_t MEM_K = 1ULL << 55; +const uint64_t M_1to2 = 1ULL << 56; +const uint64_t M_1to4 = 1ULL << 57; +const uint64_t M_1to8 = 1ULL << 58; +const uint64_t M_1to16 = 1ULL << 59; +const uint64_t XMM_ER = 1ULL << 60; +const uint64_t M_xword = 1ULL << 61; +const uint64_t M_yword = 1ULL << 62; +const uint64_t MY_1to4 = 1ULL << 18; -const uint64 NOPARA = 1ULL << (bitEnd - 1); +const uint64_t NOPARA = 1ULL << (bitEnd - 1); class Test { Test(const Test&); @@ -121,7 +121,7 @@ class Test { const bool isXbyak_; int funcNum_; // check all op1, op2, op3 - void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const + void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const { for (int i = 0; i < bitEnd; i++) { if ((op1 & (1ULL << i)) == 0) continue; @@ -144,7 +144,7 @@ class Test { } } } - void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const + void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const { for (int i = 0; i < bitEnd; i++) { if ((op & (1ULL << i)) == 0) continue; @@ -156,7 +156,7 @@ class Test { printf("\n"); } } - void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const + void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const { if (nasm == 0) nasm = xbyak; for (int i = 0; i < bitEnd; i++) { @@ -169,7 +169,7 @@ class Test { printf("\n"); } } - const char *get(uint64 type) const + const char *get(uint64_t type) const { int idx = (rand() / 31) & 7; switch (type) { @@ -537,7 +537,7 @@ public: printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]); } else { if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx); - if (z) pz = "{z}"; + if (z && kIdx) pz = "{z}"; printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]); } } @@ -574,9 +574,9 @@ public: for (size_t k = 0; k < N; k++) { #ifdef XBYAK64 for (int kIdx = 0; kIdx < 8; kIdx++) { + put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx); + put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx); for (int z = 0; z < 2; z++) { - put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1); - put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1); for (int sae = 0; sae < 5; sae++) { put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae); } @@ -615,6 +615,13 @@ public: put(p->name, K, _YMM, _YMM | MEM, IMM8); put(p->name, K, _ZMM, _ZMM | MEM, IMM8); } + put("vcmppd", K, XMM, M_1to2, IMM8); + put("vcmppd", K, YMM, M_1to4, IMM8); + put("vcmppd", K, ZMM, M_1to8, IMM8); + + put("vcmpps", K, XMM, M_1to4, IMM8); + put("vcmpps", K, YMM, M_1to8, IMM8); + put("vcmpps", K, ZMM, M_1to16, IMM8); } put("vcmppd", K2, ZMM, ZMM_SAE, IMM); #ifdef XBYAK64 diff --git a/test/make_nm.cpp b/test/make_nm.cpp index f109f1ec..47eb0237 100644 --- a/test/make_nm.cpp +++ b/test/make_nm.cpp @@ -1,5 +1,4 @@ #include -#define XBYAK_NO_OP_NAMES #include "xbyak/xbyak.h" #include "xbyak/xbyak_bin2hex.h" #include @@ -11,111 +10,111 @@ using namespace Xbyak; const int bitEnd = 64; -const uint64 MMX = 1ULL << 0; -const uint64 _XMM = 1ULL << 1; -const uint64 _MEM = 1ULL << 2; -const uint64 _REG32 = 1ULL << 3; -const uint64 EAX = 1ULL << 4; -const uint64 IMM32 = 1ULL << 5; -const uint64 IMM8 = 1ULL << 6; -const uint64 _REG8 = 1ULL << 7; -const uint64 _REG16 = 1ULL << 8; -const uint64 NEG8 = 1ULL << 9; -const uint64 IMM16 = 1ULL << 10; -const uint64 NEG16 = 1ULL << 11; -const uint64 AX = 1ULL << 12; -const uint64 AL = 1ULL << 13; -const uint64 IMM_1 = 1ULL << 14; -const uint64 MEM8 = 1ULL << 15; -const uint64 MEM16 = 1ULL << 16; -const uint64 MEM32 = 1ULL << 17; -const uint64 ONE = 1ULL << 19; -const uint64 CL = 1ULL << 20; -const uint64 MEM_ONLY_DISP = 1ULL << 21; -const uint64 NEG32 = 1ULL << 23; -const uint64 _YMM = 1ULL << 24; -const uint64 VM32X_32 = 1ULL << 39; -const uint64 VM32X_64 = 1ULL << 40; -const uint64 VM32Y_32 = 1ULL << 41; -const uint64 VM32Y_64 = 1ULL << 42; +const uint64_t MMX = 1ULL << 0; +const uint64_t _XMM = 1ULL << 1; +const uint64_t _MEM = 1ULL << 2; +const uint64_t _REG32 = 1ULL << 3; +const uint64_t EAX = 1ULL << 4; +const uint64_t IMM32 = 1ULL << 5; +const uint64_t IMM8 = 1ULL << 6; +const uint64_t _REG8 = 1ULL << 7; +const uint64_t _REG16 = 1ULL << 8; +const uint64_t NEG8 = 1ULL << 9; +const uint64_t IMM16 = 1ULL << 10; +const uint64_t NEG16 = 1ULL << 11; +const uint64_t AX = 1ULL << 12; +const uint64_t AL = 1ULL << 13; +const uint64_t IMM_1 = 1ULL << 14; +const uint64_t MEM8 = 1ULL << 15; +const uint64_t MEM16 = 1ULL << 16; +const uint64_t MEM32 = 1ULL << 17; +const uint64_t ONE = 1ULL << 19; +const uint64_t CL = 1ULL << 20; +const uint64_t MEM_ONLY_DISP = 1ULL << 21; +const uint64_t NEG32 = 1ULL << 23; +const uint64_t _YMM = 1ULL << 24; +const uint64_t VM32X_32 = 1ULL << 39; +const uint64_t VM32X_64 = 1ULL << 40; +const uint64_t VM32Y_32 = 1ULL << 41; +const uint64_t VM32Y_64 = 1ULL << 42; #ifdef XBYAK64 -const uint64 _MEMe = 1ULL << 25; -const uint64 REG32_2 = 1ULL << 26; // r8d, ... -const uint64 REG16_2 = 1ULL << 27; // r8w, ... -const uint64 REG8_2 = 1ULL << 28; // r8b, ... -const uint64 REG8_3 = 1ULL << 29; // spl, ... -const uint64 _REG64 = 1ULL << 30; // rax, ... -const uint64 _REG64_2 = 1ULL << 31; // r8, ... -const uint64 RAX = 1ULL << 32; -const uint64 _XMM2 = 1ULL << 33; -const uint64 _YMM2 = 1ULL << 34; -const uint64 VM32X = VM32X_32 | VM32X_64; -const uint64 VM32Y = VM32Y_32 | VM32Y_64; +const uint64_t _MEMe = 1ULL << 25; +const uint64_t REG32_2 = 1ULL << 26; // r8d, ... +const uint64_t REG16_2 = 1ULL << 27; // r8w, ... +const uint64_t REG8_2 = 1ULL << 28; // r8b, ... +const uint64_t REG8_3 = 1ULL << 29; // spl, ... +const uint64_t _REG64 = 1ULL << 30; // rax, ... +const uint64_t _REG64_2 = 1ULL << 31; // r8, ... +const uint64_t RAX = 1ULL << 32; +const uint64_t _XMM2 = 1ULL << 33; +const uint64_t _YMM2 = 1ULL << 34; +const uint64_t VM32X = VM32X_32 | VM32X_64; +const uint64_t VM32Y = VM32Y_32 | VM32Y_64; #else -const uint64 _MEMe = 0; -const uint64 REG32_2 = 0; -const uint64 REG16_2 = 0; -const uint64 REG8_2 = 0; -const uint64 REG8_3 = 0; -const uint64 _REG64 = 0; -const uint64 _REG64_2 = 0; -const uint64 RAX = 0; -const uint64 _XMM2 = 0; -const uint64 _YMM2 = 0; -const uint64 VM32X = VM32X_32; -const uint64 VM32Y = VM32Y_32; +const uint64_t _MEMe = 0; +const uint64_t REG32_2 = 0; +const uint64_t REG16_2 = 0; +const uint64_t REG8_2 = 0; +const uint64_t REG8_3 = 0; +const uint64_t _REG64 = 0; +const uint64_t _REG64_2 = 0; +const uint64_t RAX = 0; +const uint64_t _XMM2 = 0; +const uint64_t _YMM2 = 0; +const uint64_t VM32X = VM32X_32; +const uint64_t VM32Y = VM32Y_32; #endif -const uint64 REG64 = _REG64 | _REG64_2 | RAX; -const uint64 REG32 = _REG32 | REG32_2 | EAX; -const uint64 REG16 = _REG16 | REG16_2 | AX; -const uint64 REG32e = REG32 | REG64; -const uint64 REG8 = _REG8 | REG8_2|AL; -const uint64 MEM = _MEM | _MEMe; -const uint64 MEM64 = 1ULL << 35; -const uint64 ST0 = 1ULL << 36; -const uint64 STi = 1ULL << 37; -const uint64 IMM_2 = 1ULL << 38; -const uint64 IMM = IMM_1 | IMM_2; -const uint64 XMM = _XMM | _XMM2; -const uint64 YMM = _YMM | _YMM2; -const uint64 K = 1ULL << 43; -const uint64 _ZMM = 1ULL << 44; -const uint64 _ZMM2 = 1ULL << 45; +const uint64_t REG64 = _REG64 | _REG64_2 | RAX; +const uint64_t REG32 = _REG32 | REG32_2 | EAX; +const uint64_t REG16 = _REG16 | REG16_2 | AX; +const uint64_t REG32e = REG32 | REG64; +const uint64_t REG8 = _REG8 | REG8_2|AL; +const uint64_t MEM = _MEM | _MEMe; +const uint64_t MEM64 = 1ULL << 35; +const uint64_t ST0 = 1ULL << 36; +const uint64_t STi = 1ULL << 37; +const uint64_t IMM_2 = 1ULL << 38; +const uint64_t IMM = IMM_1 | IMM_2; +const uint64_t XMM = _XMM | _XMM2; +const uint64_t YMM = _YMM | _YMM2; +const uint64_t K = 1ULL << 43; +const uint64_t _ZMM = 1ULL << 44; +const uint64_t _ZMM2 = 1ULL << 45; #ifdef XBYAK64 -const uint64 ZMM = _ZMM | _ZMM2; -const uint64 _YMM3 = 1ULL << 46; +const uint64_t ZMM = _ZMM | _ZMM2; +const uint64_t _YMM3 = 1ULL << 46; #else -const uint64 ZMM = _ZMM; -const uint64 _YMM3 = 0; +const uint64_t ZMM = _ZMM; +const uint64_t _YMM3 = 0; #endif -const uint64 K2 = 1ULL << 47; -const uint64 ZMM_SAE = 1ULL << 48; -const uint64 ZMM_ER = 1ULL << 49; +const uint64_t K2 = 1ULL << 47; +const uint64_t ZMM_SAE = 1ULL << 48; +const uint64_t ZMM_ER = 1ULL << 49; #ifdef XBYAK64 -const uint64 _XMM3 = 1ULL << 50; +const uint64_t _XMM3 = 1ULL << 50; #endif -const uint64 XMM_SAE = 1ULL << 51; +const uint64_t XMM_SAE = 1ULL << 51; #ifdef XBYAK64 -const uint64 XMM_KZ = 1ULL << 52; -const uint64 YMM_KZ = 1ULL << 53; -const uint64 ZMM_KZ = 1ULL << 54; +const uint64_t XMM_KZ = 1ULL << 52; +const uint64_t YMM_KZ = 1ULL << 53; +const uint64_t ZMM_KZ = 1ULL << 54; #else -const uint64 XMM_KZ = 0; -const uint64 YMM_KZ = 0; -const uint64 ZMM_KZ = 0; +const uint64_t XMM_KZ = 0; +const uint64_t YMM_KZ = 0; +const uint64_t ZMM_KZ = 0; #endif -const uint64 MEM_K = 1ULL << 55; -const uint64 M_1to2 = 1ULL << 56; -const uint64 M_1to4 = 1ULL << 57; -const uint64 M_1to8 = 1ULL << 58; -const uint64 M_1to16 = 1ULL << 59; -const uint64 XMM_ER = 1ULL << 60; -const uint64 M_xword = 1ULL << 61; -const uint64 M_yword = 1ULL << 62; -const uint64 MY_1to4 = 1ULL << 18; -const uint64 BNDREG = 1ULL << 22; +const uint64_t MEM_K = 1ULL << 55; +const uint64_t M_1to2 = 1ULL << 56; +const uint64_t M_1to4 = 1ULL << 57; +const uint64_t M_1to8 = 1ULL << 58; +const uint64_t M_1to16 = 1ULL << 59; +const uint64_t XMM_ER = 1ULL << 60; +const uint64_t M_xword = 1ULL << 61; +const uint64_t M_yword = 1ULL << 62; +const uint64_t MY_1to4 = 1ULL << 18; +const uint64_t BNDREG = 1ULL << 22; -const uint64 NOPARA = 1ULL << (bitEnd - 1); +const uint64_t NOPARA = 1ULL << (bitEnd - 1); class Test { Test(const Test&); @@ -132,7 +131,7 @@ class Test { } // check all op1, op2, op3 - void put(const std::string& nm, uint64 op1 = NOPARA, uint64 op2 = NOPARA, uint64 op3 = NOPARA, uint64 op4 = NOPARA) const + void put(const std::string& nm, uint64_t op1 = NOPARA, uint64_t op2 = NOPARA, uint64_t op3 = NOPARA, uint64_t op4 = NOPARA) const { for (int i = 0; i < bitEnd; i++) { if ((op1 & (1ULL << i)) == 0) continue; @@ -155,7 +154,7 @@ class Test { } } } - void put(const char *nm, uint64 op, const char *xbyak, const char *nasm) const + void put(const char *nm, uint64_t op, const char *xbyak, const char *nasm) const { for (int i = 0; i < bitEnd; i++) { if ((op & (1ULL << i)) == 0) continue; @@ -167,7 +166,7 @@ class Test { printf("\n"); } } - void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64 op = NOPARA) const + void put(const char *nm, const char *xbyak, const char *nasm = 0, uint64_t op = NOPARA) const { if (nasm == 0) nasm = xbyak; for (int i = 0; i < bitEnd; i++) { @@ -180,7 +179,7 @@ class Test { printf("\n"); } } - const char *get(uint64 type) const + const char *get(uint64_t type) const { int idx = (rand() / 31) & 7; if (type == ST0) { @@ -460,8 +459,14 @@ class Test { "cqo", "cmpsq", "movsq", + "popfq", + "pushfq", + "lodsq", + "movsq", "scasq", "stosq", + "syscall", + "sysret", #else "aaa", "aad", @@ -469,6 +474,7 @@ class Test { "aas", "daa", "das", + "into", "popad", "popfd", "pusha", @@ -493,9 +499,17 @@ class Test { "cmpsb", "cmpsw", "cmpsd", + "int3", + "leave", + "lodsb", + "lodsw", + "lodsd", "movsb", "movsw", "movsd", + "outsb", + "outsw", + "outsd", "scasb", "scasw", "scasd", @@ -508,6 +522,8 @@ class Test { "stc", "std", "sti", + "sysenter", + "sysexit", "emms", "pause", @@ -540,6 +556,8 @@ class Test { "fabs", "faddp", "fchs", + "fclex", + "fnclex", "fcom", "fcomp", "fcompp", @@ -579,15 +597,52 @@ class Test { "fxtract", "fyl2x", "fyl2xp1", + + "monitorx", + "mwaitx", + "clzero", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { put(tbl[i]); } + { + const char memTbl[][16] = { + "clflush", + "clflushopt", + "fbld", + "fbstp", + "fldcw", + "fldenv", + "frstor", + "fsave", + "fnsave", + "fstcw", + "fnstcw", + "fstenv", + "fnstenv", + "fstsw", + "fnstsw", + "fxrstor", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) { + put(memTbl[i], MEM); + } + put("fstsw", AX); + put("fnstsw", AX); + } put("bswap", REG32e); put("lea", REG32e|REG16, MEM); - put("fldcw", MEM); - put("fstcw", MEM); + put("enter", IMM, IMM); + put(isXbyak_ ? "int_" : "int", IMM8); + put(isXbyak_ ? "in_" : "in", AL|AX|EAX, IMM8); + puts(isXbyak_ ? "in_(al, dx); dump();" : "in al, dx"); + puts(isXbyak_ ? "in_(ax, dx); dump();" : "in ax, dx"); + puts(isXbyak_ ? "in_(eax, dx); dump();" : "in eax, dx"); + put(isXbyak_ ? "out_" : "out", IMM8, AL|AX|EAX); + puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al"); + puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax"); + puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax"); } void putJmp() const { @@ -803,7 +858,7 @@ class Test { SD = 1 << 3 }; const struct { - uint8 code; + uint8_t code; const char *name; } sufTbl[] = { { 0, "ps" }, @@ -812,7 +867,7 @@ class Test { { 0xF2, "sd" }, }; static const struct XmmTbl1 { - uint8 code; + uint8_t code; int mode; const char *name; bool hasImm; @@ -841,7 +896,7 @@ class Test { for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { if (!(p->mode & (1 << j))) continue; char buf[16]; - sprintf(buf, "%s%s", p->name, sufTbl[j].name); + snprintf(buf, sizeof(buf), "%s%s", p->name, sufTbl[j].name); if (p->hasImm) { put(buf, XMM, XMM|MEM, IMM); } else { @@ -891,8 +946,8 @@ class Test { { static const struct Tbl { const char *name; - uint64 op1; - uint64 op2; + uint64_t op1; + uint64_t op2; } tbl[] = { { "cvtpi2ps", XMM, MMX|MEM }, { "cvtps2pi", MMX, XMM|MEM }, @@ -928,7 +983,9 @@ class Test { } void putCmov() const { - const char tbl[][4] = { + const struct { + const char *s; + } tbl[] = { "o", "no", "b", @@ -961,12 +1018,12 @@ class Test { "g", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { - char buf[16]; - sprintf(buf, "cmov%s", tbl[i]); + char buf[32]; + snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s); put(buf, REG16, REG16|MEM); put(buf, REG32, REG32|MEM); put(buf, REG64, REG64|MEM); - sprintf(buf, "set%s", tbl[i]); + snprintf(buf, sizeof(buf), "set%s", tbl[i].s); put(buf, REG8|REG8_3|MEM); } } @@ -1088,6 +1145,33 @@ class Test { put("pop", REG32|MEM32); #endif } + void putPushPop8_16() const + { + const struct { + int b; + uint32_t v; + } tbl[] = { + { 8, 0x7f }, + { 8, 0x80 }, + { 8, 0xff }, + { 8, 0x100 }, + { 8, 0x12345 }, + { 16, 0x7fff }, + { 16, 0x8000 }, + { 16, 0xffff }, + { 16, 0x10000 }, + { 16, 0x12345 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *b = tbl[i].b == 8 ? "byte" : "word"; + uint32_t v = tbl[i].v; + if (isXbyak_) { + printf("push(%s, 0x%x);dump();\n", b, v); + } else { + printf("push %s 0x%x\n", b, v); + } + } + } void putTest() const { const char *p = "test"; @@ -1121,6 +1205,30 @@ class Test { put("mov", REG64, tbl[i].a, tbl[i].b); } } + void putLoadSeg() const + { + const struct Tbl { + const char *name; + bool support64Bit; + } tbl[] = { +#ifdef XBYAK32 + { "lds", false }, + { "les", false }, +#endif + { "lss", true }, + { "lfs", true }, + { "lgs", true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + put(p->name, REG16|REG32, MEM); +#ifdef XBYAK64 + if (p->support64Bit) { + put(p->name, REG64, MEM); + } +#endif + } + } // only nasm void putMovImm64() const { @@ -1176,6 +1284,7 @@ class Test { put("cmpxchg8b", MEM); #ifdef XBYAK64 put("cmpxchg16b", MEM); + put("fxrstor64", MEM); #endif { const char tbl[][8] = { @@ -1384,9 +1493,9 @@ class Test { void putMPX() const { #ifdef XBYAK64 - const uint64 reg = REG64; + const uint64_t reg = REG64; #else - const uint64 reg = REG32; + const uint64_t reg = REG32; #endif put("bndcl", BNDREG, reg|MEM); put("bndcu", BNDREG, reg|MEM); @@ -2414,6 +2523,7 @@ public: separateFunc(); putSSE4_2(); putSeg(); // same behavior as yasm for mov rax, cx + putPushPop8_16(); #else putSIMPLE(); putReg1(); @@ -2423,6 +2533,7 @@ public: putPushPop(); putTest(); separateFunc(); + putLoadSeg(); putEtc(); putShift(); putShxd(); @@ -2447,7 +2558,6 @@ public: putFpuMem32_64(); separateFunc(); putFpuMem16_32_64(); - put("clflush", MEM); // current nasm is ok putFpu(); putFpuFpu(); putCmp(); @@ -2546,7 +2656,7 @@ public: printf("vaddpd(%s%s%s, %s, %s%s); dump();\n", r1, pk, pz, r2, r3, saeTblXbyak[sae]); } else { if (kIdx) CYBOZU_SNPRINTF(pk, sizeof(pk), "{k%d}", kIdx); - if (z) pz = "{z}"; + if (z && kIdx) pz = "{z}"; printf("vaddpd %s%s%s, %s, %s%s\n", r1, pk, pz, r2, r3, saeTblNASM[sae]); } } @@ -2583,9 +2693,9 @@ public: for (size_t k = 0; k < N; k++) { #ifdef XBYAK64 for (int kIdx = 0; kIdx < 8; kIdx++) { + put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx); + put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx); for (int z = 0; z < 2; z++) { - put_vaddpd(xTbl[i], xTbl[j], xTbl[k], kIdx, z == 1); - put_vaddpd(yTbl[i], yTbl[j], yTbl[k], kIdx, z == 1); for (int sae = 0; sae < 5; sae++) { put_vaddpd(zTbl[i], zTbl[j], zTbl[k], kIdx, z == 1, sae); } diff --git a/test/misc.cpp b/test/misc.cpp index 3967fefc..2a55ec2e 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -97,13 +97,43 @@ CYBOZU_TEST_AUTO(align) CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u); } align(alignSize); - const uint8 *p = getCurr(); + const uint8_t *p = getCurr(); // do nothing if aligned align(alignSize); CYBOZU_TEST_EQUAL(p, getCurr()); } } c; } +CYBOZU_TEST_AUTO(kmask) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + CYBOZU_TEST_EXCEPTION(kmovb(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovw(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovd(k1, ax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovq(k1, eax), std::exception); +#ifdef XBYAK64 + CYBOZU_TEST_EXCEPTION(kmovb(k1, rax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovw(k1, rax), std::exception); + CYBOZU_TEST_EXCEPTION(kmovd(k1, rax), std::exception); + CYBOZU_TEST_NO_EXCEPTION(kmovq(k1, rax)); +#endif + CYBOZU_TEST_NO_EXCEPTION(vmovaps(xm0|k0, ptr[eax])); + checkT_z(); + } + void checkT_z() + { + const uint8_t *p1 = getCurr(); + vmovaps(zm0, ptr[eax]); + const uint8_t *p2 = getCurr(); + vmovaps(zm0|T_z, ptr[eax]); + const uint8_t *end = getCurr(); + CYBOZU_TEST_EQUAL(p2 - p1, end - p2); + CYBOZU_TEST_EQUAL_ARRAY(p1, p2, end - p2); + } + } c; +} #ifdef XBYAK64 CYBOZU_TEST_AUTO(vfmaddps) @@ -683,4 +713,106 @@ CYBOZU_TEST_AUTO(gf2) CYBOZU_TEST_EQUAL(c.getSize(), n); CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); } + +CYBOZU_TEST_AUTO(bf16) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + vcvtne2ps2bf16(xmm0 | k1, xmm1, ptr [rax + 64]); + vcvtne2ps2bf16(ymm0 | k1 | T_z, ymm0, ptr [rax + 64]); + vcvtne2ps2bf16(zmm0 | k1, zmm1, ptr [rax + 64]); + + vcvtneps2bf16(xmm0, xword [rax + 64]); + vcvtneps2bf16(xmm0 | k1, yword [rax + 64]); + vcvtneps2bf16(ymm0 | k1, zword [rax + 64]); + vcvtneps2bf16(ymm0 | k1, ptr [rax + 64]); + + vdpbf16ps(xmm0 | k1, xmm1, ptr [rax + 64]); + vdpbf16ps(ymm0 | k1, ymm1, ptr [rax + 64]); + vdpbf16ps(zmm0 | k1, zmm1, ptr [rax + 64]); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xf2, 0x77, 0x09, 0x72, 0x40, 0x04, + 0x62, 0xf2, 0x7f, 0xa9, 0x72, 0x40, 0x02, + 0x62, 0xf2, 0x77, 0x49, 0x72, 0x40, 0x01, + + 0x62, 0xf2, 0x7e, 0x08, 0x72, 0x40, 0x04, + 0x62, 0xf2, 0x7e, 0x29, 0x72, 0x40, 0x02, + 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, + 0x62, 0xf2, 0x7e, 0x49, 0x72, 0x40, 0x01, + + 0x62, 0xf2, 0x76, 0x09, 0x52, 0x40, 0x04, + 0x62, 0xf2, 0x76, 0x29, 0x52, 0x40, 0x02, + 0x62, 0xf2, 0x76, 0x49, 0x52, 0x40, 0x01, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(AMX) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + ldtilecfg(ptr[rax + rcx * 4 + 64]); + sttilecfg(ptr[rsp + rax * 8 + 128]); + tileloadd(tmm3, ptr[rdi + rdx * 2 + 8]); + tileloaddt1(tmm4, ptr[r8 + r9 + 32]); + tilerelease(); + tilestored(ptr[r10 + r11 * 2 + 32], tmm2); + tilezero(tmm7); + tdpbssd(tmm1, tmm2, tmm3); + tdpbsud(tmm2, tmm3, tmm4); + tdpbusd(tmm3, tmm4, tmm5); + tdpbuud(tmm4, tmm5, tmm6); + tdpbf16ps(tmm5, tmm6, tmm7); + } + } c; + // generated code by patch + const uint8_t tbl[] = { + 0xc4, 0xe2, 0x78, 0x49, 0x44, 0x88, 0x40, 0xc4, 0xe2, 0x79, 0x49, 0x84, 0xc4, 0x80, 0x00, 0x00, + 0x00, 0xc4, 0xe2, 0x7b, 0x4b, 0x5c, 0x57, 0x08, 0xc4, 0x82, 0x79, 0x4b, 0x64, 0x08, 0x20, 0xc4, + 0xe2, 0x78, 0x49, 0xc0, 0xc4, 0x82, 0x7a, 0x4b, 0x54, 0x5a, 0x20, 0xc4, 0xe2, 0x7b, 0x49, 0xf8, + 0xc4, 0xe2, 0x63, 0x5e, 0xca, 0xc4, 0xe2, 0x5a, 0x5e, 0xd3, 0xc4, 0xe2, 0x51, 0x5e, 0xdc, 0xc4, + 0xe2, 0x48, 0x5e, 0xe5, 0xc4, 0xe2, 0x42, 0x5c, 0xee, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); +} + +CYBOZU_TEST_AUTO(tileloadd) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + tileloadd(tmm1, ptr[r8+r8]); + tileloadd(tmm1, ptr[rax+rcx*4]); + tileloadd(tmm1, ptr[r8+r9*1+0x40]); + } + void notSupported() + { + tileloadd(tmm1, ptr[r8]); + } + void notSupported2() + { + tileloadd(tmm1, ptr[r8*2]); + } + } c; + const uint8_t tbl[] = { + 0xC4, 0x82, 0x7B, 0x4B, 0x0C, 0x00, + 0xC4, 0xE2, 0x7B, 0x4B, 0x0C, 0x88, + 0xC4, 0x82, 0x7B, 0x4B, 0x4C, 0x08, 0x40, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); + + // current version does not support this sibmem format + CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception); + CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception); +} #endif diff --git a/test/nm_frame.cpp b/test/nm_frame.cpp index 9deceba2..ffdcc978 100644 --- a/test/nm_frame.cpp +++ b/test/nm_frame.cpp @@ -1,7 +1,8 @@ #include -#define XBYAK_NO_OP_NAMES #define XBYAK_ENABLE_OMITTED_OPERAND #include "xbyak/xbyak.h" +#define CYBOZU_TEST_DISABLE_AUTO_RUN +#include "cybozu/test.hpp" using namespace Xbyak; @@ -15,39 +16,27 @@ public: #include "nm.cpp" }; -#define _STR(x) #x -#define TEST(syntax) err = true; try { syntax; err = false; } catch (Xbyak::Error) { } catch (...) { } if (!err) printf("should be err:%s;\n", _STR(syntax)) class ErrorSample : public CodeGenerator { void operator=(const ErrorSample&); public: void gen() { - bool err; - TEST(mov(ptr[eax],1)); - TEST(test(ptr[eax],1)); - TEST(adc(ptr[eax],1)); - TEST(setz(eax)); +#ifndef XBYAK_NO_EXCEPTION + CYBOZU_TEST_EXCEPTION(mov(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(test(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(adc(ptr[eax],1), std::exception); + CYBOZU_TEST_EXCEPTION(setz(eax), std::exception); +#endif } }; + int main() - try { - size_t size = sizeof(Xbyak::Operand); - if (size != 4) { - printf("sizeof Operand %d\n", (int)size); - } - try { - Sample s; - s.gen(); - } catch (std::exception& e) { - printf("ERR:%s\n", e.what()); - } catch (...) { - printf("unknown error\n"); - } + // the size of Operand exceeds 32 bit. + CYBOZU_TEST_EQUAL(sizeof(Xbyak::Operand), 8u); + Sample s; + s.gen(); ErrorSample es; es.gen(); -} catch (std::exception& e) { - printf("err %s\n", e.what()); - return 1; } diff --git a/test/noexception.cpp b/test/noexception.cpp new file mode 100644 index 00000000..04a6dbc2 --- /dev/null +++ b/test/noexception.cpp @@ -0,0 +1,111 @@ +#define XBYAK_NO_EXCEPTION +#include + +using namespace Xbyak; + +int g_err = 0; +int g_test = 0; + +void assertEq(int x, int y) +{ + if (x != y) { + printf("ERR x=%d y=%d\n", x, y); + g_err++; + } + g_test++; +} + +void assertBool(bool b) +{ + if (!b) { + printf("ERR assertBool\n"); + g_err++; + } + g_test++; +} + +void test1() +{ + const int v = 123; + struct Code : CodeGenerator { + Code() + { + mov(eax, v); + ret(); + } + } c; + int (*f)() = c.getCode(); + assertEq(f(), v); + assertEq(Xbyak::GetError(), ERR_NONE); +} + +void test2() +{ + struct Code : CodeGenerator { + Code() + { + Label lp; + L(lp); + L(lp); + } + } c; + assertEq(Xbyak::GetError(), ERR_LABEL_IS_REDEFINED); + Xbyak::ClearError(); +} + +void test3() +{ + static struct EmptyAllocator : Xbyak::Allocator { + uint8_t *alloc() { return 0; } + } emptyAllocator; + struct Code : CodeGenerator { + Code() : CodeGenerator(8, 0, &emptyAllocator) + { + mov(eax, 3); + assertBool(Xbyak::GetError() == 0); + mov(eax, 3); + mov(eax, 3); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + assertBool(Xbyak::GetError() == 0); + } + } c; +} + +void test4() +{ + struct Code : CodeGenerator { + Code() + { + mov(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + test(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + adc(ptr[eax], 1); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + + setz(eax); + assertBool(Xbyak::GetError() != 0); + Xbyak::ClearError(); + } + }; +} + +int main() +{ + test1(); + test2(); + test3(); + test4(); + if (g_err) { + printf("err %d/%d\n", g_err, g_test); + } else { + printf("all ok %d\n", g_test); + } + return g_err != 0; +} diff --git a/test/normalize_prefix.cpp b/test/normalize_prefix.cpp index 53eae8dc..889d9253 100644 --- a/test/normalize_prefix.cpp +++ b/test/normalize_prefix.cpp @@ -6,7 +6,7 @@ #include #include -typedef unsigned char uint8; +typedef unsigned char uint8_t; std::string normalize(const std::string& line) { diff --git a/test/set_opt.bat b/test/set_opt.bat new file mode 100644 index 00000000..73a83f61 --- /dev/null +++ b/test/set_opt.bat @@ -0,0 +1,2 @@ +@echo off +set OPT=/EHsc -I../xbyak -I./ /W4 -D_CRT_SECURE_NO_WARNINGS /nologo \ No newline at end of file diff --git a/test/sf_test.cpp b/test/sf_test.cpp index 286ecd1a..038b3f0d 100644 --- a/test/sf_test.cpp +++ b/test/sf_test.cpp @@ -218,7 +218,7 @@ void check(int x, int y) } } -void verify(const Xbyak::uint8 *f, int pNum) +void verify(const Xbyak::uint8_t *f, int pNum) { switch (pNum) { case 0: @@ -264,7 +264,7 @@ void testAll() } for (int tNum = 0; tNum < maxNum; tNum++) { // printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize); - const Xbyak::uint8 *f = code.getCurr(); + const Xbyak::uint8_t *f = code.getCurr(); code.gen(pNum, tNum | opt, stackSize); verify(f, pNum); /* diff --git a/test/test_address.sh b/test/test_address.sh index 8466cc24..d63a4ef5 100755 --- a/test/test_address.sh +++ b/test/test_address.sh @@ -20,7 +20,6 @@ echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame > x.lst diff ok.lst x.lst && echo "ok" -wc x.lst } diff --git a/test/test_avx.sh b/test/test_avx.sh index 0b42eebf..236f7aa5 100755 --- a/test/test_avx.sh +++ b/test/test_avx.sh @@ -33,12 +33,11 @@ g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_avx512.sh b/test/test_avx512.sh index 0a03109f..cce5de0c 100755 --- a/test/test_avx512.sh +++ b/test/test_avx512.sh @@ -29,5 +29,4 @@ echo "xbyak" echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512 ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/test/test_avx_all.bat b/test/test_avx_all.bat index ec74d281..3bf79f2c 100644 --- a/test/test_avx_all.bat +++ b/test/test_avx_all.bat @@ -7,3 +7,7 @@ echo ** yasm-avx(32bit) *** call test_avx Y echo ** yasm-avx(64bit) *** call test_avx Y64 +echo ** nasm-avx512(32bit) *** +call test_avx512 +echo ** nasm-avx512(64bit) *** +call test_avx512 64 diff --git a/test/test_nm.bat b/test/test_nm.bat index 0d63b650..428eb741 100644 --- a/test/test_nm.bat +++ b/test/test_nm.bat @@ -17,6 +17,10 @@ if /i "%1"=="Y" ( set OPT2=-DUSE_YASM -DXBYAK64 set OPT3=win64 set FILTER=normalize_prefix +) else if /i "%1"=="noexcept" ( + set EXE=nasm.exe + set OPT2=-DXBYAK32 -DXBYAK_NO_EXCEPTION + set OPT3=win32 ) else ( set EXE=nasm.exe set OPT2=-DXBYAK32 @@ -27,7 +31,7 @@ bmake -f Makefile.win all echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs cl -I../ make_nm.cpp %OPT% %OPT2% /EHs make_nm > a.asm -rm a.lst +rm -rf a.lst echo %EXE% -f %OPT3% -l a.lst a.asm %EXE% -f %OPT3% -l a.lst a.asm rem connect "?????-" and "??" @@ -39,5 +43,4 @@ if /i "%Y%"=="1" ( make_nm jit > nm.cpp cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% nm_frame |%FILTER% > x.lst -diff -w x.lst ok.lst -wc x.lst +diff -wb x.lst ok.lst && echo "ok" diff --git a/test/test_nm.sh b/test/test_nm.sh index 6001ace9..3e328012 100755 --- a/test/test_nm.sh +++ b/test/test_nm.sh @@ -25,6 +25,11 @@ else if ($1 == "avx512") then set OPT2="-DXBYAK64 -DUSE_AVX512" set OPT3=win64 set FILTER=./normalize_prefix +else if ($1 == "noexcept") then + echo "nasm(32bit) without exception" + set EXE=nasm + set OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION" + set OPT3=win32 else echo "nasm(32bit)" set EXE=nasm @@ -33,18 +38,17 @@ else endif set CFLAGS="-Wall -fno-operator-names -I../ $OPT2" -echo "compile make_nm.cpp" +echo "compile make_nm.cpp with $CFLAGS" g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst echo "xbyak" ./make_nm jit > nm.cpp echo "compile nm_frame.cpp" g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame ./nm_frame | $FILTER > x.lst -diff ok.lst x.lst && echo "ok" -exit 0 +diff -B ok.lst x.lst && echo "ok" diff --git a/xbyak.sln b/xbyak.sln index 0f23415b..b20afd6f 100644 --- a/xbyak.sln +++ b/xbyak.sln @@ -1,19 +1,20 @@ -þ½Ž¿ -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}" +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.28010.2016 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcxproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcxproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcxproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcxproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcxproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcxproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcxproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -83,4 +84,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {DAE0012B-DDCC-4614-9110-D52E351B2A80} + EndGlobalSection EndGlobal diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index f768927e..4310455b 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -9,10 +9,8 @@ @note modified new BSD license http://opensource.org/licenses/BSD-3-Clause */ -#ifndef XBYAK_NO_OP_NAMES - #if not +0 // trick to detect whether 'not' is operator or not - #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." - #endif +#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not + #define XBYAK_NO_OP_NAMES #endif #include // for debug print @@ -26,7 +24,9 @@ // #define XBYAK_DISABLE_AVX512 -//#define XBYAK_USE_MMAP_ALLOCATOR +#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR) + #define XBYAK_USE_MMAP_ALLOCATOR +#endif #if !defined(__GNUC__) || defined(__MINGW32__) #undef XBYAK_USE_MMAP_ALLOCATOR #endif @@ -72,13 +72,24 @@ #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap #endif #ifdef _WIN32 - #include + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif #include #include + #define XBYAK_TLS __declspec(thread) #elif defined(__GNUC__) #include #include #include + #define XBYAK_TLS __thread +#endif +#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT) + #define XBYAK_USE_MAP_JIT + #include + #ifndef MAP_JIT + #define MAP_JIT 0x800 + #endif #endif #if !defined(_MSC_VER) || (_MSC_VER >= 1600) #include @@ -98,7 +109,18 @@ #endif #if (__cplusplus >= 201103) || (_MSC_VER >= 1800) + #undef XBYAK_TLS + #define XBYAK_TLS thread_local #define XBYAK_VARIADIC_TEMPLATE + #define XBYAK_NOEXCEPT noexcept +#else + #define XBYAK_NOEXCEPT throw() +#endif + +#if (__cplusplus >= 201402L) || (_MSC_VER >= 1910) // Visual Studio 2017 version 15.0 + #define XBYAK_CONSTEXPR constexpr // require c++14 or later +#else + #define XBYAK_CONSTEXPR #endif #ifdef _MSC_VER @@ -113,21 +135,17 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5780 /* 0xABCD = A.BC(D) */ + VERSION = 0x5970 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED #define MIE_INTEGER_TYPE_DEFINED -#ifdef _MSC_VER - typedef unsigned __int64 uint64; - typedef __int64 sint64; -#else - typedef uint64_t uint64; - typedef int64_t sint64; -#endif -typedef unsigned int uint32; -typedef unsigned short uint16; -typedef unsigned char uint8; +// for backward compatibility +typedef uint64_t uint64; +typedef int64_t sint64; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; #endif #ifndef MIE_ALIGN @@ -186,81 +204,120 @@ enum { ERR_INVALID_ZERO, ERR_INVALID_RIP_IN_AUTO_GROW, ERR_INVALID_MIB_ADDRESS, - ERR_INTERNAL, - ERR_X2APIC_IS_NOT_SUPPORTED + ERR_X2APIC_IS_NOT_SUPPORTED, + ERR_NOT_SUPPORTED, + ERR_INTERNAL // Put it at last. }; +inline const char *ConvertErrorToString(int err) +{ + static const char *errTbl[] = { + "none", + "bad addressing", + "code is too big", + "bad scale", + "esp can't be index", + "bad combination", + "bad size of register", + "imm is too big", + "bad align", + "label is redefined", + "label is too far", + "label is not found", + "code is not copyable", + "bad parameter", + "can't protect", + "can't use 64bit disp(use (void*))", + "offset is too big", + "MEM size is not specified", + "bad mem size", + "bad st combination", + "over local label", + "under local label", + "can't alloc", + "T_SHORT is not supported in AutoGrow", + "bad protect mode", + "bad pNum", + "bad tNum", + "bad vsib addressing", + "can't convert", + "label is not set by L()", + "label is already set by L()", + "bad label string", + "err munmap", + "opmask is already set", + "rounding is already set", + "k0 is invalid", + "evex is invalid", + "sae(suppress all exceptions) is invalid", + "er(embedded rounding) is invalid", + "invalid broadcast", + "invalid opmask with memory", + "invalid zero", + "invalid rip in AutoGrow", + "invalid mib address", + "x2APIC is not supported", + "not supported", + "internal error" + }; + assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); + return err <= ERR_INTERNAL ? errTbl[err] : "unknown err"; +} + +#ifdef XBYAK_NO_EXCEPTION +namespace local { + +inline int& GetErrorRef() { + static XBYAK_TLS int err = 0; + return err; +} + +inline void SetError(int err) { + if (local::GetErrorRef()) return; // keep the first err code + local::GetErrorRef() = err; +} + +} // local + +inline void ClearError() { + local::GetErrorRef() = 0; +} +inline int GetError() { return local::GetErrorRef(); } + +#define XBYAK_THROW(err) { local::SetError(err); return; } +#define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; } + +#else class Error : public std::exception { int err_; public: explicit Error(int err) : err_(err) { if (err_ < 0 || err_ > ERR_INTERNAL) { - fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_); - exit(1); + err_ = ERR_INTERNAL; } } operator int() const { return err_; } - const char *what() const throw() + const char *what() const XBYAK_NOEXCEPT { - static const char *errTbl[] = { - "none", - "bad addressing", - "code is too big", - "bad scale", - "esp can't be index", - "bad combination", - "bad size of register", - "imm is too big", - "bad align", - "label is redefined", - "label is too far", - "label is not found", - "code is not copyable", - "bad parameter", - "can't protect", - "can't use 64bit disp(use (void*))", - "offset is too big", - "MEM size is not specified", - "bad mem size", - "bad st combination", - "over local label", - "under local label", - "can't alloc", - "T_SHORT is not supported in AutoGrow", - "bad protect mode", - "bad pNum", - "bad tNum", - "bad vsib addressing", - "can't convert", - "label is not set by L()", - "label is already set by L()", - "bad label string", - "err munmap", - "opmask is already set", - "rounding is already set", - "k0 is invalid", - "evex is invalid", - "sae(suppress all exceptions) is invalid", - "er(embedded rounding) is invalid", - "invalid broadcast", - "invalid opmask with memory", - "invalid zero", - "invalid rip in AutoGrow", - "invalid mib address", - "internal error", - "x2APIC is not supported" - }; - assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); - return errTbl[err_]; + return ConvertErrorToString(err_); } }; +// dummy functions +inline void ClearError() { } +inline int GetError() { return 0; } + inline const char *ConvertErrorToString(const Error& err) { return err.what(); } +#define XBYAK_THROW(err) { throw Error(err); } +#define XBYAK_THROW_RET(err, r) { throw Error(err); } + +#endif + inline void *AlignedMalloc(size_t size, size_t alignment) { #ifdef __MINGW32__ @@ -286,7 +343,7 @@ inline void AlignedFree(void *p) } template -inline const To CastTo(From p) throw() +inline const To CastTo(From p) XBYAK_NOEXCEPT { return (const To)(size_t)(p); } @@ -294,15 +351,15 @@ namespace inner { static const size_t ALIGN_PAGE_SIZE = 4096; -inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } -inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } +inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; } +inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } -inline uint32 VerifyInInt32(uint64 x) +inline uint32_t VerifyInInt32(uint64_t x) { #ifdef XBYAK64 - if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0) #endif - return static_cast(x); + return static_cast(x); } enum LabelMode { @@ -317,23 +374,50 @@ enum LabelMode { custom allocator */ struct Allocator { - virtual uint8 *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } - virtual void free(uint8 *p) { AlignedFree(p); } + virtual uint8_t *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } + virtual void free(uint8_t *p) { AlignedFree(p); } virtual ~Allocator() {} /* override to return false if you call protect() manually */ virtual bool useProtect() const { return true; } }; #ifdef XBYAK_USE_MMAP_ALLOCATOR +#ifdef XBYAK_USE_MAP_JIT +namespace util { + +inline int getMacOsVersionPure() +{ + char buf[64]; + size_t size = sizeof(buf); + int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0); + if (err != 0) return 0; + char *endp; + int major = strtol(buf, &endp, 10); + if (*endp != '.') return 0; + return major; +} + +inline int getMacOsVersion() +{ + static const int version = getMacOsVersionPure(); + return version; +} + +} // util +#endif class MmapAllocator : Allocator { typedef XBYAK_STD_UNORDERED_MAP SizeList; SizeList sizeList_; public: - uint8 *alloc(size_t size) + uint8_t *alloc(size_t size) { const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; size = (size + alignedSizeM1) & ~alignedSizeM1; -#ifdef MAP_ANONYMOUS +#if defined(XBYAK_USE_MAP_JIT) + int mode = MAP_PRIVATE | MAP_ANONYMOUS; + const int mojaveVersion = 18; + if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; +#elif defined(MAP_ANONYMOUS) const int mode = MAP_PRIVATE | MAP_ANONYMOUS; #elif defined(MAP_ANON) const int mode = MAP_PRIVATE | MAP_ANON; @@ -341,17 +425,17 @@ public: #error "not supported" #endif void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0); - if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC); + if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) assert(p); sizeList_[(uintptr_t)p] = size; - return (uint8*)p; + return (uint8_t*)p; } - void free(uint8 *p) + void free(uint8_t *p) { if (p == 0) return; SizeList::iterator i = sizeList_.find((uintptr_t)p); - if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER); - if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP); + if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER) + if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP) sizeList_.erase(i); } }; @@ -361,10 +445,10 @@ class Address; class Reg; class Operand { - static const uint8 EXT8BIT = 0x20; + static const uint8_t EXT8BIT = 0x20; unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil - unsigned int kind_:9; - unsigned int bit_:10; + unsigned int kind_:10; + unsigned int bit_:14; protected: unsigned int zero_:1; unsigned int mask_:3; @@ -381,7 +465,8 @@ public: YMM = 1 << 5, ZMM = 1 << 6, OPMASK = 1 << 7, - BNDREG = 1 << 8 + BNDREG = 1 << 8, + TMM = 1 << 9 }; enum Code { #ifdef XBYAK64 @@ -395,55 +480,55 @@ public: AX = 0, CX, DX, BX, SP, BP, SI, DI, AL = 0, CL, DL, BL, AH, CH, DH, BH }; - Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { } - Operand(int idx, Kind kind, int bit, bool ext8bit = 0) - : idx_(static_cast(idx | (ext8bit ? EXT8BIT : 0))) + XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0) { } + XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0) + : idx_(static_cast(idx | (ext8bit ? EXT8BIT : 0))) , kind_(kind) , bit_(bit) , zero_(0), mask_(0), rounding_(0) { assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two } - Kind getKind() const { return static_cast(kind_); } - int getIdx() const { return idx_ & (EXT8BIT - 1); } - bool isNone() const { return kind_ == 0; } - bool isMMX() const { return is(MMX); } - bool isXMM() const { return is(XMM); } - bool isYMM() const { return is(YMM); } - bool isZMM() const { return is(ZMM); } - bool isXMEM() const { return is(XMM | MEM); } - bool isYMEM() const { return is(YMM | MEM); } - bool isZMEM() const { return is(ZMM | MEM); } - bool isOPMASK() const { return is(OPMASK); } - bool isBNDREG() const { return is(BNDREG); } - bool isREG(int bit = 0) const { return is(REG, bit); } - bool isMEM(int bit = 0) const { return is(MEM, bit); } - bool isFPU() const { return is(FPU); } - bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; } - bool isExtIdx() const { return (getIdx() & 8) != 0; } - bool isExtIdx2() const { return (getIdx() & 16) != 0; } - bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); } - bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); } - bool hasZero() const { return zero_; } - int getOpmaskIdx() const { return mask_; } - int getRounding() const { return rounding_; } + XBYAK_CONSTEXPR Kind getKind() const { return static_cast(kind_); } + XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); } + XBYAK_CONSTEXPR bool isNone() const { return kind_ == 0; } + XBYAK_CONSTEXPR bool isMMX() const { return is(MMX); } + XBYAK_CONSTEXPR bool isXMM() const { return is(XMM); } + XBYAK_CONSTEXPR bool isYMM() const { return is(YMM); } + XBYAK_CONSTEXPR bool isZMM() const { return is(ZMM); } + XBYAK_CONSTEXPR bool isTMM() const { return is(TMM); } + XBYAK_CONSTEXPR bool isXMEM() const { return is(XMM | MEM); } + XBYAK_CONSTEXPR bool isYMEM() const { return is(YMM | MEM); } + XBYAK_CONSTEXPR bool isZMEM() const { return is(ZMM | MEM); } + XBYAK_CONSTEXPR bool isOPMASK() const { return is(OPMASK); } + XBYAK_CONSTEXPR bool isBNDREG() const { return is(BNDREG); } + XBYAK_CONSTEXPR bool isREG(int bit = 0) const { return is(REG, bit); } + XBYAK_CONSTEXPR bool isMEM(int bit = 0) const { return is(MEM, bit); } + XBYAK_CONSTEXPR bool isFPU() const { return is(FPU); } + XBYAK_CONSTEXPR bool isExt8bit() const { return (idx_ & EXT8BIT) != 0; } + XBYAK_CONSTEXPR bool isExtIdx() const { return (getIdx() & 8) != 0; } + XBYAK_CONSTEXPR bool isExtIdx2() const { return (getIdx() & 16) != 0; } + XBYAK_CONSTEXPR bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); } + XBYAK_CONSTEXPR bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); } + XBYAK_CONSTEXPR bool hasZero() const { return zero_; } + XBYAK_CONSTEXPR int getOpmaskIdx() const { return mask_; } + XBYAK_CONSTEXPR int getRounding() const { return rounding_; } void setKind(Kind kind) { - if ((kind & (XMM|YMM|ZMM)) == 0) return; + if ((kind & (XMM|YMM|ZMM|TMM)) == 0) return; kind_ = kind; - bit_ = kind == XMM ? 128 : kind == YMM ? 256 : 512; + bit_ = kind == XMM ? 128 : kind == YMM ? 256 : kind == ZMM ? 512 : 8192; } // err if MMX/FPU/OPMASK/BNDREG void setBit(int bit); - void setOpmaskIdx(int idx, bool ignore_idx0 = false) + void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true) { - if (!ignore_idx0 && idx == 0) throw Error(ERR_K0_IS_INVALID); - if (mask_) throw Error(ERR_OPMASK_IS_ALREADY_SET); + if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) mask_ = idx; } void setRounding(int idx) { - if (rounding_) throw Error(ERR_ROUNDING_IS_ALREADY_SET); + if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET) rounding_ = idx; } void setZero() { zero_ = true; } @@ -456,12 +541,12 @@ public: return AH <= idx && idx <= BH; } // any bit is accetable if bit == 0 - bool is(int kind, uint32 bit = 0) const + XBYAK_CONSTEXPR bool is(int kind, uint32_t bit = 0) const { return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16) } - bool isBit(uint32 bit) const { return (bit_ & bit) != 0; } - uint32 getBit() const { return bit_; } + XBYAK_CONSTEXPR bool isBit(uint32_t bit) const { return (bit_ & bit) != 0; } + XBYAK_CONSTEXPR uint32_t getBit() const { return bit_; } const char *toString() const { const int idx = getIdx(); @@ -480,6 +565,11 @@ public: } else if (isOPMASK()) { static const char *tbl[8] = { "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }; return tbl[idx]; + } else if (isTMM()) { + static const char *tbl[8] = { + "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7" + }; + return tbl[idx]; } else if (isZMM()) { static const char *tbl[32] = { "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", @@ -508,7 +598,7 @@ public: static const char *tbl[4] = { "bnd0", "bnd1", "bnd2", "bnd3" }; return tbl[idx]; } - throw Error(ERR_INTERNAL); + XBYAK_THROW_RET(ERR_INTERNAL, 0); } bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; } bool operator==(const Operand& rhs) const; @@ -519,13 +609,13 @@ public: inline void Operand::setBit(int bit) { - if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR; + if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512 && bit != 8192) goto ERR; if (isBit(bit)) return; - if (is(MEM)) { + if (is(MEM | OPMASK)) { bit_ = bit; return; } - if (is(REG | XMM | YMM | ZMM)) { + if (is(REG | XMM | YMM | ZMM | TMM)) { int idx = getIdx(); // err if converting ah, bh, ch, dh if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit()) goto ERR; @@ -547,16 +637,18 @@ inline void Operand::setBit(int bit) case 128: kind = XMM; break; case 256: kind = YMM; break; case 512: kind = ZMM; break; + case 8192: kind = TMM; break; } idx_ = idx; kind_ = kind; bit_ = bit; + if (bit >= 128) return; // keep mask_ and rounding_ mask_ = 0; rounding_ = 0; return; } ERR: - throw Error(ERR_CANT_CONVERT); + XBYAK_THROW(ERR_CANT_CONVERT) } class Label; @@ -569,17 +661,17 @@ struct Reg64; #endif class Reg : public Operand { public: - Reg() { } - Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } + XBYAK_CONSTEXPR Reg() { } + XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; } - uint8 getRexW() const { return isREG(64) ? 8 : 0; } - uint8 getRexR() const { return isExtIdx() ? 4 : 0; } - uint8 getRexX() const { return isExtIdx() ? 2 : 0; } - uint8 getRexB() const { return isExtIdx() ? 1 : 0; } - uint8 getRex(const Reg& base = Reg()) const + uint8_t getRexW() const { return isREG(64) ? 8 : 0; } + uint8_t getRexR() const { return isExtIdx() ? 4 : 0; } + uint8_t getRexX() const { return isExtIdx() ? 2 : 0; } + uint8_t getRexB() const { return isExtIdx() ? 1 : 0; } + uint8_t getRex(const Reg& base = Reg()) const { - uint8 rex = getRexW() | getRexR() | base.getRexW() | base.getRexB(); + uint8_t rex = getRexW() | getRexR() | base.getRexW() | base.getRexB(); if (rex || isExt8bit() || base.isExt8bit()) rex |= 0x40; return rex; } @@ -598,15 +690,15 @@ inline const Reg& Operand::getReg() const } struct Reg8 : public Reg { - explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } + explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } }; struct Reg16 : public Reg { - explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } + explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } }; struct Mmx : public Reg { - explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } + explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } }; struct EvexModifierRounding { @@ -617,35 +709,41 @@ struct EvexModifierRounding { T_RZ_SAE = 4, T_SAE = 5 }; - explicit EvexModifierRounding(int rounding) : rounding(rounding) {} + explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {} int rounding; }; -struct EvexModifierZero{EvexModifierZero() {}}; +struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}}; struct Xmm : public Mmx { - explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } - Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } + explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } + XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; } Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; } Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; } }; struct Ymm : public Xmm { - explicit Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } + explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; } }; struct Zmm : public Ymm { - explicit Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } + explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; } }; +#ifdef XBYAK64 +struct Tmm : public Reg { + explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { } +}; +#endif + struct Opmask : public Reg { - explicit Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} + explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} }; struct BoundsReg : public Reg { - explicit BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} + explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} }; templateT operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; } @@ -653,43 +751,43 @@ templateT operator|(const T& x, const EvexModifierZero&) { T r(x); r.se templateT operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; } struct Fpu : public Reg { - explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } + explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } }; struct Reg32e : public Reg { - explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} + explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} }; struct Reg32 : public Reg32e { - explicit Reg32(int idx = 0) : Reg32e(idx, 32) {} + explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {} }; #ifdef XBYAK64 struct Reg64 : public Reg32e { - explicit Reg64(int idx = 0) : Reg32e(idx, 64) {} + explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {} }; struct RegRip { - sint64 disp_; + int64_t disp_; const Label* label_; bool isAddr_; - explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} + explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} friend const RegRip operator+(const RegRip& r, int disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } friend const RegRip operator-(const RegRip& r, int disp) { return RegRip(r.disp_ - disp, r.label_, r.isAddr_); } - friend const RegRip operator+(const RegRip& r, sint64 disp) { + friend const RegRip operator+(const RegRip& r, int64_t disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } - friend const RegRip operator-(const RegRip& r, sint64 disp) { + friend const RegRip operator-(const RegRip& r, int64_t disp) { return RegRip(r.disp_ - disp, r.label_, r.isAddr_); } friend const RegRip operator+(const RegRip& r, const Label& label) { - if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING); + if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); return RegRip(r.disp_, &label); } friend const RegRip operator+(const RegRip& r, const void *addr) { - if (r.label_ || r.isAddr_) throw Error(ERR_BAD_ADDRESSING); - return RegRip(r.disp_ + (sint64)addr, 0, true); + if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip()); + return RegRip(r.disp_ + (int64_t)addr, 0, true); } }; #endif @@ -724,7 +822,7 @@ public: enum { es, cs, ss, ds, fs, gs }; - explicit Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } + explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } int getIdx() const { return idx_; } const char *toString() const { @@ -743,14 +841,14 @@ public: #else enum { i32e = 32 }; #endif - RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } - RegExp(const Reg& r, int scale = 1) + XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } + XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) : scale_(scale) , disp_(0) { - if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (scale == 0) return; - if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE); + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE) if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index index_ = r; } else { @@ -776,20 +874,20 @@ public: const Reg& getIndex() const { return index_; } int getScale() const { return scale_; } size_t getDisp() const { return disp_; } - void verify() const + XBYAK_CONSTEXPR void verify() const { - if (base_.getBit() >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (index_.getBit() && index_.getBit() <= 64) { - if (index_.getIdx() == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX); - if (base_.getBit() && base_.getBit() != index_.getBit()) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX) + if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } friend RegExp operator+(const RegExp& a, const RegExp& b); friend RegExp operator-(const RegExp& e, size_t disp); - uint8 getRex() const + uint8_t getRex() const { - uint8 rex = index_.getRexX() | base_.getRexB(); - return rex ? uint8(rex | 0x40) : 0; + uint8_t rex = index_.getRexX() | base_.getRexB(); + return rex ? uint8_t(rex | 0x40) : 0; } private: /* @@ -804,12 +902,12 @@ private: inline RegExp operator+(const RegExp& a, const RegExp& b) { - if (a.index_.getBit() && b.index_.getBit()) throw Error(ERR_BAD_ADDRESSING); + if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) RegExp ret = a; if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; } if (b.base_.getBit()) { if (ret.base_.getBit()) { - if (ret.index_.getBit()) throw Error(ERR_BAD_ADDRESSING); + if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) // base + base => base + index * 1 ret.index_ = b.base_; // [reg + esp] => [esp + reg] @@ -853,9 +951,9 @@ class CodeArray { inner::LabelMode mode; AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} - uint64 getVal(const uint8 *top) const + uint64_t getVal(const uint8_t *top) const { - uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); + uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); if (jmpSize == 4) disp = inner::VerifyInInt32(disp); return disp; } @@ -871,7 +969,7 @@ class CodeArray { Allocator *alloc_; protected: size_t maxSize_; - uint8 *top_; + uint8_t *top_; size_t size_; bool isCalledCalcJmpAddress_; @@ -882,8 +980,8 @@ protected: void growMemory() { const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); - uint8 *newTop = alloc_->alloc(newSize); - if (newTop == 0) throw Error(ERR_CANT_ALLOC); + uint8_t *newTop = alloc_->alloc(newSize); + if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC) for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; alloc_->free(top_); top_ = newTop; @@ -896,7 +994,7 @@ protected: { if (isCalledCalcJmpAddress_) return; for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { - uint64 disp = i->getVal(top_); + uint64_t disp = i->getVal(top_); rewrite(i->codeOffset, disp, i->jmpSize); } isCalledCalcJmpAddress_ = true; @@ -911,14 +1009,14 @@ public: : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF) , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) , maxSize_(maxSize) - , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) + , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) , size_(0) , isCalledCalcJmpAddress_(false) { - if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC); + if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC) if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) { alloc_->free(top_); - throw Error(ERR_CANT_PROTECT); + XBYAK_THROW(ERR_CANT_PROTECT) } } virtual ~CodeArray() @@ -932,7 +1030,7 @@ public: { bool isOK = protect(top_, maxSize_, mode); if (isOK) return true; - if (throwException) throw Error(ERR_CANT_PROTECT); + if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false) return false; } bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); } @@ -949,38 +1047,38 @@ public: if (type_ == AUTO_GROW) { growMemory(); } else { - throw Error(ERR_CODE_IS_TOO_BIG); + XBYAK_THROW(ERR_CODE_IS_TOO_BIG) } } - top_[size_++] = static_cast(code); + top_[size_++] = static_cast(code); } - void db(const uint8 *code, size_t codeSize) + void db(const uint8_t *code, size_t codeSize) { for (size_t i = 0; i < codeSize; i++) db(code[i]); } - void db(uint64 code, size_t codeSize) + void db(uint64_t code, size_t codeSize) { - if (codeSize > 8) throw Error(ERR_BAD_PARAMETER); - for (size_t i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); + if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER) + for (size_t i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); } - void dw(uint32 code) { db(code, 2); } - void dd(uint32 code) { db(code, 4); } - void dq(uint64 code) { db(code, 8); } - const uint8 *getCode() const { return top_; } + void dw(uint32_t code) { db(code, 2); } + void dd(uint32_t code) { db(code, 4); } + void dq(uint64_t code) { db(code, 8); } + const uint8_t *getCode() const { return top_; } template const F getCode() const { return reinterpret_cast(top_); } - const uint8 *getCurr() const { return &top_[size_]; } + const uint8_t *getCurr() const { return &top_[size_]; } template const F getCurr() const { return reinterpret_cast(&top_[size_]); } size_t getSize() const { return size_; } void setSize(size_t size) { - if (size > maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) size_ = size; } void dump() const { - const uint8 *p = getCode(); + const uint8_t *p = getCode(); size_t bufSize = getSize(); size_t remain = bufSize; for (int i = 0; i < 4; i++) { @@ -1005,13 +1103,13 @@ public: @param disp [in] offset from the next of jmp @param size [in] write size(1, 2, 4, 8) */ - void rewrite(size_t offset, uint64 disp, size_t size) + void rewrite(size_t offset, uint64_t disp, size_t size) { assert(offset < maxSize_); - if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER); - uint8 *const data = top_ + offset; + if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER) + uint8_t *const data = top_ + offset; for (size_t i = 0; i < size; i++) { - data[i] = static_cast(disp >> (i * 8)); + data[i] = static_cast(disp >> (i * 8)); } } void save(size_t offset, size_t val, int size, inner::LabelMode mode) @@ -1068,9 +1166,9 @@ public: @param alignedSize [in] power of two @return aligned addr by alingedSize */ - static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16) + static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16) { - return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); + return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); } }; @@ -1082,15 +1180,15 @@ public: M_rip, M_ripAddr }; - Address(uint32 sizeBit, bool broadcast, const RegExp& e) + XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast) { e_.verify(); } #ifdef XBYAK64 - explicit Address(size_t disp) + explicit XBYAK_CONSTEXPR Address(size_t disp) : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ } - Address(uint32 sizeBit, bool broadcast, const RegRip& addr) + XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr) : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { } #endif RegExp getRegExp(bool optimize = true) const @@ -1101,7 +1199,7 @@ public: bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax size_t getDisp() const { return e_.getDisp(); } - uint8 getRex() const + uint8_t getRex() const { if (mode_ != M_ModRM) return 0; return getRegExp().getRex(); @@ -1138,9 +1236,9 @@ class AddressFrame { void operator=(const AddressFrame&); AddressFrame(const AddressFrame&); public: - const uint32 bit_; + const uint32_t bit_; const bool broadcast_; - explicit AddressFrame(uint32 bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } + explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } Address operator[](const RegExp& e) const { return Address(bit_, broadcast_, e); @@ -1150,7 +1248,7 @@ public: return Address(bit_, broadcast_, RegExp(reinterpret_cast(disp))); } #ifdef XBYAK64 - Address operator[](uint64 disp) const { return Address(disp); } + Address operator[](uint64_t disp) const { return Address(disp); } Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); } #endif }; @@ -1179,7 +1277,7 @@ public: ~Label(); void clear() { mgr = 0; id = 0; } int getId() const { return id; } - const uint8 *getAddress() const; + const uint8_t *getAddress() const; // backward compatibility static inline std::string toStr(int num) @@ -1237,7 +1335,7 @@ class LabelManager { // add label typename DefList::value_type item(labelId, addrOffset); std::pair ret = defList.insert(item); - if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED); + if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED) // search undefined label for (;;) { typename UndefList::iterator itr = undefList.find(labelId); @@ -1252,9 +1350,9 @@ class LabelManager { } else { disp = addrOffset - jmp->endOfJmp + jmp->disp; #ifdef XBYAK64 - if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG); + if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) #endif - if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR); + if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) } if (base_->isAutoGrow()) { base_->save(offset, disp, jmp->jmpSize, jmp->mode); @@ -1326,6 +1424,7 @@ public: clabelDefList_.clear(); clabelUndefList_.clear(); resetLabelPtrList(); + ClearError(); } void enterLocal() { @@ -1333,14 +1432,14 @@ public: } void leaveLocal() { - if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL); - if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND); + if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL) + if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) stateList_.pop_back(); } void set(CodeArray *base) { base_ = base; } void defineSlabel(std::string label) { - if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR); + if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR) if (label == "@@") { SlabelDefList& defList = stateList_.front().defList; SlabelDefList::iterator i = defList.find("@f"); @@ -1367,7 +1466,7 @@ public: void assign(Label& dst, const Label& src) { ClabelDefList::const_iterator i = clabelDefList_.find(src.id); - if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); + if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L) define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); dst.mgr = this; labelPtrList_.insert(&dst); @@ -1379,7 +1478,7 @@ public: if (defList.find("@f") != defList.end()) { label = "@f"; } else if (defList.find("@b") == defList.end()) { - throw Error(ERR_LABEL_IS_NOT_FOUND); + XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false) } } else if (label == "@f") { if (defList.find("@f") != defList.end()) { @@ -1410,7 +1509,7 @@ public: return false; } bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } - const uint8 *getCode() const { return base_->getCode(); } + const uint8_t *getCode() const { return base_->getCode(); } bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); } }; @@ -1422,7 +1521,7 @@ inline Label::Label(const Label& rhs) } inline Label& Label::operator=(const Label& rhs) { - if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); + if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this) id = rhs.id; mgr = rhs.mgr; if (mgr) mgr->incRefCount(id, this); @@ -1432,7 +1531,7 @@ inline Label::~Label() { if (id && mgr) mgr->decRefCount(id, this); } -inline const uint8* Label::getAddress() const +inline const uint8_t* Label::getAddress() const { if (mgr == 0 || !mgr->isReady()) return 0; size_t offset; @@ -1451,7 +1550,7 @@ private: CodeGenerator operator=(const CodeGenerator&); // don't call #ifdef XBYAK64 enum { i32e = 32 | 64, BIT = 64 }; - static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788; + static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull); typedef Reg64 NativeReg; #else enum { i32e = 32, BIT = 32 }; @@ -1495,10 +1594,10 @@ private: } void rex(const Operand& op1, const Operand& op2 = Operand()) { - uint8 rex = 0; + uint8_t rex = 0; const Operand *p1 = &op1, *p2 = &op2; if (p1->isMEM()) std::swap(p1, p2); - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) if (p2->isMEM()) { const Address& addr = p2->getAddress(); if (BIT == 64 && addr.is32bit()) db(0x67); @@ -1558,13 +1657,13 @@ private: bool r = reg.isExtIdx(); bool b = base.isExtIdx(); int idx = v ? v->getIdx() : 0; - if ((idx | reg.getIdx() | base.getIdx()) >= 16) throw Error(ERR_BAD_COMBINATION); - uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; - uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; + if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION) + uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; + uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; if (!b && !x && !w && (type & T_0F)) { db(0xC5); db((r ? 0 : 0x80) | vvvv); } else { - uint32 mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + uint32_t mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); } db(code); @@ -1572,29 +1671,29 @@ private: void verifySAE(const Reg& r, int type) const { if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return; - throw Error(ERR_SAE_IS_INVALID); + XBYAK_THROW(ERR_SAE_IS_INVALID) } void verifyER(const Reg& r, int type) const { if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; - throw Error(ERR_ER_IS_INVALID); + XBYAK_THROW(ERR_ER_IS_INVALID) } // (a, b, c) contains non zero two or three values then err int verifyDuplicate(int a, int b, int c, int err) { int v = a | b | c; - if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err); + if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0) return v; } - int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false) + int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) { - if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID); + if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) int w = (type & T_EW1) ? 1 : 0; - uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; - uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; + uint32_t mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; int idx = v ? v->getIdx() : 0; - uint32 vvvv = ~idx; + uint32_t vvvv = ~idx; bool R = !reg.isExtIdx(); bool X = x ? false : !base.isExtIdx2(); @@ -1632,6 +1731,7 @@ private: bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx); bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); + if (aaa == 0) z = 0; // clear T_z if mask is not set db(0x62); db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3)); db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3)); @@ -1641,16 +1741,16 @@ private: } void setModRM(int mod, int r1, int r2) { - db(static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); + db(static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); } void setSIB(const RegExp& e, int reg, int disp8N = 0) { - size_t disp64 = e.getDisp(); + uint64_t disp64 = e.getDisp(); #ifdef XBYAK64 - size_t high = disp64 >> 32; - if (high != 0 && high != 0xFFFFFFFF) throw Error(ERR_OFFSET_IS_TOO_BIG); + uint64_t high = disp64 >> 32; + if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) #endif - uint32 disp = static_cast(disp64); + uint32_t disp = static_cast(disp64); const Reg& base = e.getBase(); const Reg& index = e.getIndex(); const int baseIdx = base.getIdx(); @@ -1669,7 +1769,7 @@ private: } } else { // disp must be casted to signed - uint32 t = static_cast(static_cast(disp) / disp8N); + uint32_t t = static_cast(static_cast(disp) / disp8N); if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) { disp = t; mod = mod01; @@ -1699,7 +1799,7 @@ private: } } LabelManager labelMgr_; - bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } + bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) { rex(reg2, reg1); @@ -1708,23 +1808,31 @@ private: } void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) rex(addr, reg); db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); opAddr(addr, reg.getIdx(), immSize); } + void opLoadSeg(const Address& addr, const Reg& reg, int code0, int code1 = NONE) + { + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + rex(addr, reg); + db(code0); if (code1 != NONE) db(code1); + opAddr(addr, reg.getIdx()); + } void opMIB(const Address& addr, const Reg& reg, int code0, int code1) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - if (addr.getMode() != Address::M_ModRM) throw Error(ERR_INVALID_MIB_ADDRESS); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) if (BIT == 64 && addr.is32bit()) db(0x67); const RegExp& regExp = addr.getRegExp(false); - uint8 rex = regExp.getRex(); + uint8_t rex = regExp.getRex(); if (rex) db(rex); db(code0); db(code1); setSIB(regExp, reg.getIdx()); } - void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) { const int shortJmpSize = 2; const int longHeaderSize = longPref ? 2 : 1; @@ -1732,13 +1840,14 @@ private: if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { db(shortCode); db(disp - shortJmpSize); } else { - if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR); + if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) if (longPref) db(longPref); db(longCode); dd(disp - longJmpSize); } } + bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); } template - void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) { if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ size_t offset = 0; @@ -1746,7 +1855,7 @@ private: makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); } else { int jmpSize = 0; - if (type == T_NEAR) { + if (isNEAR(type)) { jmpSize = 4; if (longPref) db(longPref); db(longCode); dd(0); @@ -1758,17 +1867,17 @@ private: labelMgr_.addUndefinedLabel(label, jmp); } } - void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0) + void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0) { if (isAutoGrow()) { - if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); + if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW) if (size_ + 16 >= maxSize_) growMemory(); if (longPref) db(longPref); db(longCode); dd(0); save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); } else { - makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, longPref); + makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, longPref); } } @@ -1777,7 +1886,7 @@ private: // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false) { - if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!permitVisb && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) if (addr.getMode() == Address::M_ModRM) { setSIB(addr.getRegExp(), reg, disp8N); } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { @@ -1787,7 +1896,7 @@ private: } else { size_t disp = addr.getDisp(); if (addr.getMode() == Address::M_ripAddr) { - if (isAutoGrow()) throw Error(ERR_INVALID_RIP_IN_AUTO_GROW); + if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) disp -= (size_t)getCurr() + 4 + immSize; } dd(inner::VerifyInInt32(disp)); @@ -1797,7 +1906,7 @@ private: /* preCode is for SSSE3/SSE4 */ void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) { - if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION); + if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION) if (pref != NONE) db(pref); if (op.isMEM()) { opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0); @@ -1824,7 +1933,7 @@ private: } else if (op1.isMEM() && op2.isXMM()) { opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) @@ -1845,7 +1954,7 @@ private: } else if (op.isMEM()) { opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } void opShift(const Operand& op, int imm, int ext) @@ -1856,7 +1965,7 @@ private: } void opShift(const Operand& op, const Reg8& _cl, int ext) { - if (_cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) opR_ModM(op, 0, ext, 0xD2); } void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0) @@ -1866,12 +1975,12 @@ private: } else if (condM) { opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } - void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *_cl = 0) + void opShxd(const Operand& op, const Reg& reg, uint8_t imm, int code, const Reg8 *_cl = 0) { - if (_cl && _cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1); if (!_cl) db(imm); } @@ -1885,12 +1994,12 @@ private: } } // (REG|MEM, IMM) - void opRM_I(const Operand& op, uint32 imm, int code, int ext) + void opRM_I(const Operand& op, uint32_t imm, int code, int ext) { verifyMemHasSize(op); - uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; + uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; if (op.isBit(8)) immBit = 8; - if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG); + if (op.getBit() < immBit) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al rex(op); @@ -1932,21 +2041,21 @@ private: return; } } - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } void verifyMemHasSize(const Operand& op) const { - if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED); + if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED) } /* mov(r, imm) = db(imm, mov_imm(r, imm)) */ - int mov_imm(const Reg& reg, size_t imm) + int mov_imm(const Reg& reg, uint64_t imm) { int bit = reg.getBit(); const int idx = reg.getIdx(); int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3); - if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) { + if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) { rex(Reg32(idx)); bit = 32; } else { @@ -1970,32 +2079,32 @@ private: if (relative) { db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize); } else if (isAutoGrow()) { - db(uint64(0), jmpSize); + db(uint64_t(0), jmpSize); save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); } else { db(size_t(top_) + offset, jmpSize); } return; } - db(uint64(0), jmpSize); + db(uint64_t(0), jmpSize); JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); labelMgr_.addUndefinedLabel(label, jmp); } - void opMovxx(const Reg& reg, const Operand& op, uint8 code) + void opMovxx(const Reg& reg, const Operand& op, uint8_t code) { - if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION); + if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) int w = op.isBit(16); #ifdef XBYAK64 - if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION); + if (op.isHigh8bit()) XBYAK_THROW(ERR_BAD_COMBINATION) #endif bool cond = reg.isREG() && (reg.getBit() > op.getBit()); opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); } - void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext) + void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext) { - if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); - uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; - if (!code) throw Error(ERR_BAD_MEM_SIZE); + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; + if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE) if (m64ext && addr.isBit(64)) ext = m64ext; rex(addr, st0); @@ -2004,14 +2113,14 @@ private: } // use code1 if reg1 == st0 // use code2 if reg1 != st0 && reg2 == st0 - void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2) + void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2) { - uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; - if (!code) throw Error(ERR_BAD_ST_COMBINATION); - db(uint8(code >> 8)); - db(uint8(code | (reg1.getIdx() | reg2.getIdx()))); + uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; + if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION) + db(uint8_t(code >> 8)); + db(uint8_t(code | (reg1.getIdx() | reg2.getIdx()))); } - void opFpu(const Fpu& reg, uint8 code1, uint8 code2) + void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2) { db(code1); db(code2 | reg.getIdx()); } @@ -2027,10 +2136,10 @@ private: bool x = index.isExtIdx(); if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { int aaa = addr.getOpmaskIdx(); - if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY); + if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY) bool b = false; if (addr.isBroadcast()) { - if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST); + if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST) b = true; } int VL = regExp.isVsib() ? index.getBit() : 0; @@ -2052,13 +2161,13 @@ private: } // (r, r, r/m) if isR_R_RM // (r, r/m, r) - void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE) + void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8_t code, bool isR_R_RM, int imm8 = NONE) { const Operand *p1 = &op1; const Operand *p2 = &op2; if (!isR_R_RM) std::swap(p1, p2); const unsigned int bit = r.getBit(); - if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION); + if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION) type |= (bit == 64) ? T_W1 : T_W0; opVex(r, p1, *p2, type, code, imm8); } @@ -2071,23 +2180,23 @@ private: op = &op1; } // (x1, x2, op) - if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) throw Error(ERR_BAD_COMBINATION); + if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, x2, *op, type, code0, imm8); } void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, int type, int code0, int imm8 = NONE) { - if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw Error(ERR_BAD_COMBINATION); + if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, &x2, op3, type, code0, imm8); } // (x, x/m), (y, x/m256), (z, y/m) void checkCvt1(const Operand& x, const Operand& op) const { - if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) } // (x, x/m), (x, y/m256), (y, z/m) void checkCvt2(const Xmm& x, const Operand& op) const { - if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) throw Error(ERR_BAD_COMBINATION); + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) } void opCvt2(const Xmm& x, const Operand& op, int type, int code) { @@ -2095,9 +2204,9 @@ private: Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; opVex(x.copyAndSetKind(kind), &xm0, op, type, code); } - void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8 code) + void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code) { - if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) Xmm x(op.getIdx()); const Operand *p = op.isREG() ? &x : &op; opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); @@ -2112,18 +2221,18 @@ private: opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8); } // QQQ:need to refactor - void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1) + void opSp1(const Reg& reg, const Operand& op, uint8_t pref, uint8_t code0, uint8_t code1) { - if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); - if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) if (is16bit) db(0x66); db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); } - void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode) + void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8_t code, int mode) { const RegExp& regExp = addr.getRegExp(); - if (!regExp.isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) const int y_vx_y = 0; const int y_vy_y = 1; // const int x_vy_x = 2; @@ -2137,7 +2246,7 @@ private: } else { // x_vy_x isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); } - if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) } opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code); } @@ -2157,11 +2266,11 @@ private: case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return; break; } - throw Error(ERR_BAD_VSIB_ADDRESSING); + XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) } - void opGather2(const Xmm& x, const Address& addr, int type, uint8 code, int mode) + void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode) { - if (x.hasZero()) throw Error(ERR_INVALID_ZERO); + if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) checkGather2(x, addr.getRegExp().getIndex(), mode); opVex(x, 0, addr, type, code); } @@ -2169,21 +2278,52 @@ private: xx_xy_yz ; mode = true xx_xy_xz ; mode = false */ - void opVmov(const Operand& op, const Xmm& x, int type, uint8 code, bool mode) + void opVmov(const Operand& op, const Xmm& x, int type, uint8_t code, bool mode) { if (mode) { - if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION) } else { - if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); + if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) } opVex(x, 0, op, type, code); } - void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8 code, Operand::Kind kind) + void opGatherFetch(const Address& addr, const Xmm& x, int type, uint8_t code, Operand::Kind kind) { - if (addr.hasZero()) throw Error(ERR_INVALID_ZERO); - if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING); + if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) + if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) opVex(x, 0, addr, type, code); } + void opInOut(const Reg& a, const Reg& d, uint8_t code) + { + if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { + switch (a.getBit()) { + case 8: db(code); return; + case 16: db(0x66); db(code + 1); return; + case 32: db(code + 1); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } + void opInOut(const Reg& a, uint8_t code, uint8_t v) + { + if (a.getIdx() == Operand::AL) { + switch (a.getBit()) { + case 8: db(code); db(v); return; + case 16: db(0x66); db(code + 1); db(v); return; + case 32: db(code + 1); db(v); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } +#ifdef XBYAK64 + void opAMX(const Tmm& t1, const Address& addr, int type, int code0) + { + // require both base and index + const RegExp exp = addr.getRegExp(false); + if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED) + opVex(t1, &tmm0, addr, type, code0); + } +#endif public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -2193,7 +2333,7 @@ public: const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; - const Ymm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; + const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; const Reg16 ax, cx, dx, bx, sp, bp, si, di; const Reg8 al, cl, dl, bl, ah, ch, dh, bh; @@ -2219,6 +2359,7 @@ public: const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15; const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23; const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31; + const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7; const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; @@ -2233,6 +2374,9 @@ public: #ifndef XBYAK_DISABLE_SEGMENT const Segment es, cs, ss, ds, fs, gs; #endif +private: + bool isDefaultJmpNEAR_; +public: void L(const std::string& label) { labelMgr_.defineSlabel(label); } void L(Label& label) { labelMgr_.defineClabel(label); } Label L() { Label label; L(label); return label; } @@ -2252,6 +2396,8 @@ public: void putL(std::string label) { putL_inner(label); } void putL(const Label& label) { putL_inner(label); } + // set default type of `jmp` of undefined label to T_NEAR + void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; } void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); } void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } @@ -2274,7 +2420,7 @@ public: { opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), 0x84); } - void test(const Operand& op, uint32 imm) + void test(const Operand& op, uint32_t imm) { verifyMemHasSize(op); int immSize = (std::min)(op.getBit() / 8, 4U); @@ -2299,18 +2445,18 @@ public: } void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); } void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); } - void push(const AddressFrame& af, uint32 imm) + void push(const AddressFrame& af, uint32_t imm) { - if (af.bit_ == 8 && inner::IsInDisp8(imm)) { + if (af.bit_ == 8) { db(0x6A); db(imm); - } else if (af.bit_ == 16 && isInDisp16(imm)) { + } else if (af.bit_ == 16) { db(0x66); db(0x68); dw(imm); } else { db(0x68); dd(imm); } } /* use "push(word, 4)" if you want "push word 4" */ - void push(uint32 imm) + void push(uint32_t imm) { if (inner::IsInDisp8(imm)) { push(byte, imm); @@ -2322,7 +2468,7 @@ public: { const Reg *reg = 0; const Address *addr = 0; - uint8 code = 0; + uint8_t code = 0; if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp] reg = ®1.getReg(); addr= ®2.getAddress(); @@ -2340,21 +2486,21 @@ public: db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3); db(addr->getDisp(), 8); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } else #else if (code && addr->isOnlyDisp()) { rex(*reg, *addr); db(code | (reg->isBit(8) ? 0 : 1)); - dd(static_cast(addr->getDisp())); + dd(static_cast(addr->getDisp())); } else #endif { opRM_RM(reg1, reg2, 0x88); } } - void mov(const Operand& op, size_t imm) + void mov(const Operand& op, uint64_t imm) { if (op.isREG()) { const int size = mov_imm(op.getReg(), imm); @@ -2363,27 +2509,24 @@ public: verifyMemHasSize(op); int immSize = op.getBit() / 8; if (immSize <= 4) { - sint64 s = sint64(imm) >> (immSize * 8); - if (s != 0 && s != -1) throw Error(ERR_IMM_IS_TOO_BIG); + int64_t s = int64_t(imm) >> (immSize * 8); + if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) } else { - if (!inner::IsInInt32(imm)) throw Error(ERR_IMM_IS_TOO_BIG); + if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) immSize = 4; } opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize); - db(static_cast(imm), immSize); + db(static_cast(imm), immSize); } else { - throw Error(ERR_BAD_COMBINATION); + XBYAK_THROW(ERR_BAD_COMBINATION) } } - void mov(const NativeReg& reg, const char *label) // can't use std::string - { - if (label == 0) { - mov(static_cast(reg), 0); // call imm - return; - } - mov_imm(reg, dummyAddr); - putL(label); - } + + // The template is used to avoid ambiguity when the 2nd argument is 0. + // When the 2nd argument is 0 the call goes to + // `void mov(const Operand& op, uint64_t imm)`. + template + void mov(const T1&, const T2 *) { T1::unexpected; } void mov(const NativeReg& reg, const Label& label) { mov_imm(reg, dummyAddr); @@ -2395,7 +2538,7 @@ public: if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { p1 = &op2; p2 = &op1; } - if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) #ifdef XBYAK64 && (p2->getIdx() != 0 || !p1->isREG(32)) @@ -2425,7 +2568,7 @@ public: { switch (seg.getIdx()) { case Segment::es: db(0x07); break; - case Segment::cs: throw Error(ERR_BAD_COMBINATION); + case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION) case Segment::ss: db(0x17); break; case Segment::ds: db(0x1F); break; case Segment::fs: db(0x0F); db(0xA1); break; @@ -2495,6 +2638,7 @@ public: , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15) , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23) , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31) + , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7) // for my convenience , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23) @@ -2510,6 +2654,7 @@ public: #ifndef XBYAK_DISABLE_SEGMENT , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs) #endif + , isDefaultJmpNEAR_(false) { labelMgr_.set(this); } @@ -2526,7 +2671,7 @@ public: */ void ready(ProtectMode mode = PROTECT_RWE) { - if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND); + if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) if (isAutoGrow()) { calcJmpAddress(); if (useProtect()) setProtectMode(mode); @@ -2563,7 +2708,7 @@ public: AMD and Intel seem to agree on the same sequences for up to 9 bytes: https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf */ - static const uint8 nopTbl[9][9] = { + static const uint8_t nopTbl[9][9] = { {0x90}, {0x66, 0x90}, {0x0F, 0x1F, 0x00}, @@ -2577,7 +2722,7 @@ public: const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]); while (size > 0) { size_t len = (std::min)(n, size); - const uint8 *seq = nopTbl[len - 1]; + const uint8_t *seq = nopTbl[len - 1]; db(seq, len); size -= len; } @@ -2591,7 +2736,7 @@ public: void align(size_t x = 16, bool useMultiByteNop = true) { if (x == 1) return; - if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN); + if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN) if (isAutoGrow() && x > inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", (int)x); size_t remain = size_t(getCurr()) % x; if (remain) { @@ -2601,39 +2746,48 @@ public: #endif }; +template <> +inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string +{ + assert(label); + mov_imm(reg, dummyAddr); + putL(label); +} + namespace util { -static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); -static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); -static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); -static const Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); -static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); -static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); -static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); -static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); -static const AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); -static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); -static const Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); -static const BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); -static const EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); -static const EvexModifierZero T_z; +static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); +static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); +static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); +static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); +static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); +static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); +static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); +static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); +static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); +static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); +static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); +static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); +static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); +static const XBYAK_CONSTEXPR EvexModifierZero T_z; #ifdef XBYAK64 -static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); -static const Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); -static const Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); -static const Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); -static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); -static const Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); -static const Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); -static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); -static const Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); -static const Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); -static const Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); -static const Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); -static const Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); -static const RegRip rip; +static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); +static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); +static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); +static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); +static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); +static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); +static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); +static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); +static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); +static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); +static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); +static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); +static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); +static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7); +static const XBYAK_CONSTEXPR RegRip rip; #endif #ifndef XBYAK_DISABLE_SEGMENT -static const Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); +static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); #endif } // util diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index bb40f8cc..85e8bed5 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,8 +1,8 @@ -const char *getVersionString() const { return "5.78"; } -void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } +const char *getVersionString() const { return "5.97"; } +void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } -void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); } +void add(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x00, 0); } void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); } void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); } void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); } @@ -16,8 +16,8 @@ void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void and_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x20, 4); } void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_0F38, 0xf2, true); } void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); } @@ -25,8 +25,8 @@ void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isX void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); } void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); } void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); } -void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); } @@ -45,20 +45,22 @@ void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); } void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); } void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xA3); } -void bt(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); } +void bt(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); } void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xBB); } -void btc(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); } +void btc(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); } void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xB3); } -void btr(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); } +void btr(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); } void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xAB); } -void bts(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); } +void bts(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); } void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf5, false); } void cbw() { db(0x66); db(0x98); } void cdq() { db(0x99); } void clc() { db(0xF8); } void cld() { db(0xFC); } void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } +void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); } void cli() { db(0xFA); } +void clzero() { db(0x0F); db(0x01); db(0xFC); } void cmc() { db(0xF5); } void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524 void cmovae(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }//-V524 @@ -90,7 +92,7 @@ void cmovpe(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | void cmovpo(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }//-V524 void cmovs(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 8); }//-V524 void cmovz(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }//-V524 -void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); } +void cmp(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x38, 7); } void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); } void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } @@ -120,12 +122,12 @@ void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } -void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } -void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } +void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } +void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } void cmpsb() { db(0xA6); } void cmpsd() { db(0xA7); } -void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } -void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } +void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } +void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } void cmpsw() { db(0x66); db(0xA7); } void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } @@ -167,10 +169,11 @@ void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); } void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); } void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); } -void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void emms() { db(0x0F); db(0x77); } -void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); } +void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); } +void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); } void f2xm1() { db(0xD9); db(0xF0); } void fabs() { db(0xD9); db(0xE1); } void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); } @@ -179,7 +182,10 @@ void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC void faddp() { db(0xDE); db(0xC1); } void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } +void fbld(const Address& addr) { opModM(addr, Reg32(4), 0xDF, 0x100); } +void fbstp(const Address& addr) { opModM(addr, Reg32(6), 0xDF, 0x100); } void fchs() { db(0xD9); db(0xE0); } +void fclex() { db(0x9B); db(0xDB); db(0xE2); } void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } @@ -240,6 +246,7 @@ void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); } void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); } void fld1() { db(0xD9); db(0xE8); } void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); } +void fldenv(const Address& addr) { opModM(addr, Reg32(4), 0xD9, 0x100); } void fldl2e() { db(0xD9); db(0xEA); } void fldl2t() { db(0xD9); db(0xE9); } void fldlg2() { db(0xD9); db(0xEC); } @@ -252,22 +259,33 @@ void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC void fmulp() { db(0xDE); db(0xC9); } void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } +void fnclex() { db(0xDB); db(0xE2); } void fninit() { db(0xDB); db(0xE3); } void fnop() { db(0xD9); db(0xD0); } +void fnsave(const Address& addr) { opModM(addr, Reg32(6), 0xDD, 0x100); } +void fnstcw(const Address& addr) { opModM(addr, Reg32(7), 0xD9, 0x100); } +void fnstenv(const Address& addr) { opModM(addr, Reg32(6), 0xD9, 0x100); } +void fnstsw(const Address& addr) { opModM(addr, Reg32(7), 0xDD, 0x100); } +void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); } void fpatan() { db(0xD9); db(0xF3); } void fprem() { db(0xD9); db(0xF8); } void fprem1() { db(0xD9); db(0xF5); } void fptan() { db(0xD9); db(0xF2); } void frndint() { db(0xD9); db(0xFC); } +void frstor(const Address& addr) { opModM(addr, Reg32(4), 0xDD, 0x100); } +void fsave(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xDD, 0x100); } void fscale() { db(0xD9); db(0xFD); } void fsin() { db(0xD9); db(0xFE); } void fsincos() { db(0xD9); db(0xFB); } void fsqrt() { db(0xD9); db(0xFA); } void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); } void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); } -void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); } +void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, 0x100); } +void fstenv(const Address& addr) { db(0x9B); opModM(addr, Reg32(6), 0xD9, 0x100); } void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); } void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } +void fstsw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xDD, 0x100); } +void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); } void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); } void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } @@ -294,11 +312,12 @@ void fwait() { db(0x9B); } void fxam() { db(0xD9); db(0xE5); } void fxch() { db(0xD9); db(0xC9); } void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } +void fxrstor(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xAE); } void fxtract() { db(0xD9); db(0xF4); } void fyl2x() { db(0xD9); db(0xF1); } void fyl2xp1() { db(0xD9); db(0xF9); } -void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); } void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); } @@ -306,8 +325,12 @@ void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXM void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); } void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); } void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); } +void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); } +void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); } void inc(const Operand& op) { opIncDec(op, 0x40, 0); } -void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void int3() { db(0xCC); } +void int_(uint8_t x) { db(0xCD); db(x); } void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524 void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524 @@ -431,12 +454,28 @@ void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0 void lahf() { db(0x9F); } void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); } void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); } -void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); } +void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); } +void leave() { db(0xC9); } void lfence() { db(0x0F); db(0xAE); db(0xE8); } +void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); } +void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); } void lock() { db(0xF0); } +void lodsb() { db(0xAC); } +void lodsd() { db(0xAD); } +void lodsw() { db(0x66); db(0xAD); } +void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loop(const char *label) { loop(std::string(label)); } +void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loope(const char *label) { loope(std::string(label)); } +void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void loopne(const char *label) { loopne(std::string(label)); } +void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); } void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); } -void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); } +void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); } void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); } void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); } void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); } @@ -447,6 +486,7 @@ void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXM void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); } void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); } void monitor() { db(0x0F); db(0x01); db(0xC8); } +void monitorx() { db(0x0F); db(0x01); db(0xFA); } void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); } void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); } void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); } @@ -476,7 +516,7 @@ void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); } void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); } void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); } -void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); } +void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); } void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); } void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); } void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); } @@ -495,7 +535,7 @@ void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); } void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); } void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); } void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); } -void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); } void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); } void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); } @@ -503,12 +543,18 @@ void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); } void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); } void mwait() { db(0x0F); db(0x01); db(0xC9); } +void mwaitx() { db(0x0F); db(0x01); db(0xFB); } void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); } void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } -void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +void or_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x08, 1); } void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); } void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); } +void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); } +void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); } +void outsb() { db(0x6E); } +void outsd() { db(0x6F); } +void outsw() { db(0x66); db(0x6F); } void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); } void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); } void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); } @@ -524,36 +570,36 @@ void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); } void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); } void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); } void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); } -void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } +void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); } void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); } void pause() { db(0xF3); db(0x90); } void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); } void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); } void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); } -void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } -void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); } void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); } void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); } -void pcmpestri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void pcmpestrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, imm, 0x3A); } void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); } void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); } void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); } void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); } -void pcmpistri(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void pcmpistrm(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, imm, 0x3A); } void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf5, true); } void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, T_F3 | T_0F38, 0xf5, true); } -void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); } -void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); } -void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); } +void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); } +void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); } +void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); } void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); } void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); } void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); } @@ -561,9 +607,9 @@ void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); } void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); } void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); } -void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); } -void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); } -void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); } +void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); } +void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); } +void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); } void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); } void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); } void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); } @@ -609,10 +655,10 @@ void prefetchw(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x0D); } void prefetchwt1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x0D); } void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); } void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); } -void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } -void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } -void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } -void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } +void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } +void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } +void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } +void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); } void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); } void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); } @@ -661,21 +707,25 @@ void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); } void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } void rdmsr() { db(0x0F); db(0x32); } void rdpmc() { db(0x0F); db(0x33); } -void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } -void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } +void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } +void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); } void rdtsc() { db(0x0F); db(0x31); } void rdtscp() { db(0x0F); db(0x01); db(0xF9); } void rep() { db(0xF3); } +void repe() { db(0xF3); } +void repne() { db(0xF2); } +void repnz() { db(0xF2); } +void repz() { db(0xF3); } void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } } void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); } void rol(const Operand& op, int imm) { opShift(op, imm, 0); } void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); } void ror(const Operand& op, int imm) { opShift(op, imm, 1); } -void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); } -void roundpd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void roundps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); } -void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } -void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); } +void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, imm, 0x3A); } +void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); } void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); } void sahf() { db(0x9E); } @@ -684,7 +734,7 @@ void sal(const Operand& op, int imm) { opShift(op, imm, 4); } void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); } void sar(const Operand& op, int imm) { opShift(op, imm, 7); } void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); } -void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } +void sbb(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x18, 3); } void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } void scasb() { db(0xAE); } void scasd() { db(0xAF); } @@ -723,22 +773,22 @@ void sfence() { db(0x0F); db(0xAE); db(0xF8); } void sha1msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC9, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha1msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCA, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha1nexte(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xC8, NONE, isXMM_XMMorMEM, NONE, 0x38); } -void sha1rnds4(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); } +void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, imm, 0x3A); } void sha256msg1(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCC, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha256msg2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCD, NONE, isXMM_XMMorMEM, NONE, 0x38); } void sha256rnds2(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCB, NONE, isXMM_XMMorMEM, NONE, 0x38); } void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } void shl(const Operand& op, int imm) { opShift(op, imm, 4); } void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); } -void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); } +void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xA4); } void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_66 | T_0F38, 0xf7, false); } void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); } void shr(const Operand& op, int imm) { opShift(op, imm, 5); } void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); } -void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); } +void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0xAC); } void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F2 | T_0F38, 0xf7, false); } -void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } -void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } +void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } +void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); } void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); } void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); } @@ -751,12 +801,14 @@ void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } void stosb() { db(0xAA); } void stosd() { db(0xAB); } void stosw() { db(0x66); db(0xAB); } -void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } +void sub(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x28, 5); } void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); } void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); } void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); } void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); } +void sysenter() { db(0x0F); db(0x34); } +void sysexit() { db(0x0F); db(0x35); } void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); } void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); } void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); } @@ -776,19 +828,19 @@ void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operan void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); } void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); } void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); } -void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); } +void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); } void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); } void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); } void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); } void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); } -void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); } -void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); } +void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); } +void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); } void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); } void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); } void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); } void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); } -void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); } -void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); } +void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); } +void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); } void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } @@ -901,10 +953,10 @@ void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } -void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); } -void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); } -void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); } -void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); } +void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); } +void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); } +void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); } +void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); } void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } @@ -930,7 +982,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_ void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); } void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); } void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); } -void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); } +void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); } void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); } void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); } void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); } @@ -945,11 +997,11 @@ void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); } void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); } void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); } -void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); } -void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); } -void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); } -void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); } -void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); } +void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); } +void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); } +void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); } +void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); } +void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); } void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); } void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); } void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); } @@ -1014,16 +1066,16 @@ void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1 void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); } void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); } void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); } -void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } -void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } +void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } +void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); } void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); } void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); } void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); } void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); } -void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } -void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } -void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } +void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } +void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } +void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); } void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); } void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); } @@ -1043,25 +1095,25 @@ void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_ void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); } void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); } void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); } -void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); } -void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); } +void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); } +void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); } void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP | T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); } void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); } void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); } void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); } void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); } -void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); } +void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); } void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); } -void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); } +void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); } void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); } -void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); } -void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } +void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); } +void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); } void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); } -void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); } +void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); } void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); } -void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); } -void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } -void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } +void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); } +void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } +void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); } void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); } void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); } @@ -1071,17 +1123,17 @@ void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); } void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_M_K, 0x11); } void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } -void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } +void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); } void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); } void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); } void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); } void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } -void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } +void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); } void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); } void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); } void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); } void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); } -void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); } +void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); } void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); } void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); } void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); } @@ -1103,47 +1155,47 @@ void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); } void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); } void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); } -void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); } +void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); } void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); } void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); } void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); } void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE3); } -void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); } +void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); } void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); } -void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); } -void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); } -void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); } -void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); } -void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); } -void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); } +void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); } +void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); } +void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); } +void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); } +void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); } +void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); } void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); } void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); } void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); } void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x75); } -void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); } -void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); } +void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); } +void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); } void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x64); } void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x66); } void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x37); } void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); } -void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } -void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } -void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } -void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } +void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } +void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } +void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } +void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); } void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); } -void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); } +void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); } void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); } -void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); } -void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); } +void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); } +void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); } void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x16); } void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); } -void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); } +void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); } void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); } -void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); } -void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); } -void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } -void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } +void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); } +void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); } +void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } +void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); } void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); } void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); } @@ -1155,10 +1207,10 @@ void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); } void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); } void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); } -void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); } -void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); } -void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); } -void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); } +void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); } +void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); } +void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); } +void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); } void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); } void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); } void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); } @@ -1177,7 +1229,7 @@ void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); } void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); } void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); } -void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); } void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); } void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); } void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); } @@ -1200,34 +1252,34 @@ void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); } void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); } void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); } -void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } -void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } -void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); } +void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); } void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); } void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); } void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); } -void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); } -void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } -void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } +void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); } void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); } void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); } -void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); } -void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); } void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); } -void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); } -void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } +void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); } -void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } -void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } +void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); } void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); } void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); } -void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } +void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); } void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); } void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); } @@ -1249,14 +1301,14 @@ void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM( void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEF); } void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); } void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x53); } -void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); } -void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); } -void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); } -void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); } +void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); } +void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); } +void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); } +void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); } void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); } void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); } -void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); } -void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); } +void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); } +void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); } void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); } void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); } void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); } @@ -1284,13 +1336,13 @@ void wrmsr() { db(0x0F); db(0x30); } void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); } void xgetbv() { db(0x0F); db(0x01); db(0xD0); } void xlatb() { db(0xD7); } -void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +void xor_(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x30, 6); } void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); } void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); } #ifdef XBYAK_ENABLE_OMITTED_OPERAND -void vblendpd(const Xmm& x, const Operand& op, uint8 imm) { vblendpd(x, x, op, imm); } -void vblendps(const Xmm& x, const Operand& op, uint8 imm) { vblendps(x, x, op, imm); } +void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); } +void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); } void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvpd(x1, x1, op, x4); } void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvps(x1, x1, op, x4); } void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmpeq_ospd(x, x, op); } @@ -1405,10 +1457,10 @@ void vcmpordpd(const Xmm& x, const Operand& op) { vcmpordpd(x, x, op); } void vcmpordps(const Xmm& x, const Operand& op) { vcmpordps(x, x, op); } void vcmpordsd(const Xmm& x, const Operand& op) { vcmpordsd(x, x, op); } void vcmpordss(const Xmm& x, const Operand& op) { vcmpordss(x, x, op); } -void vcmppd(const Xmm& x, const Operand& op, uint8 imm) { vcmppd(x, x, op, imm); } -void vcmpps(const Xmm& x, const Operand& op, uint8 imm) { vcmpps(x, x, op, imm); } -void vcmpsd(const Xmm& x, const Operand& op, uint8 imm) { vcmpsd(x, x, op, imm); } -void vcmpss(const Xmm& x, const Operand& op, uint8 imm) { vcmpss(x, x, op, imm); } +void vcmppd(const Xmm& x, const Operand& op, uint8_t imm) { vcmppd(x, x, op, imm); } +void vcmpps(const Xmm& x, const Operand& op, uint8_t imm) { vcmpps(x, x, op, imm); } +void vcmpsd(const Xmm& x, const Operand& op, uint8_t imm) { vcmpsd(x, x, op, imm); } +void vcmpss(const Xmm& x, const Operand& op, uint8_t imm) { vcmpss(x, x, op, imm); } void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmptrue_uspd(x, x, op); } void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmptrue_usps(x, x, op); } void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmptrue_ussd(x, x, op); } @@ -1429,10 +1481,10 @@ void vcvtsd2ss(const Xmm& x, const Operand& op) { vcvtsd2ss(x, x, op); } void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); } void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); } void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); } -void vdppd(const Xmm& x, const Operand& op, uint8 imm) { vdppd(x, x, op, imm); } -void vdpps(const Xmm& x, const Operand& op, uint8 imm) { vdpps(x, x, op, imm); } -void vinsertps(const Xmm& x, const Operand& op, uint8 imm) { vinsertps(x, x, op, imm); } -void vmpsadbw(const Xmm& x, const Operand& op, uint8 imm) { vmpsadbw(x, x, op, imm); } +void vdppd(const Xmm& x, const Operand& op, uint8_t imm) { vdppd(x, x, op, imm); } +void vdpps(const Xmm& x, const Operand& op, uint8_t imm) { vdpps(x, x, op, imm); } +void vinsertps(const Xmm& x, const Operand& op, uint8_t imm) { vinsertps(x, x, op, imm); } +void vmpsadbw(const Xmm& x, const Operand& op, uint8_t imm) { vmpsadbw(x, x, op, imm); } void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); } void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); } void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); } @@ -1445,15 +1497,15 @@ void vpaddsw(const Xmm& x, const Operand& op) { vpaddsw(x, x, op); } void vpaddusb(const Xmm& x, const Operand& op) { vpaddusb(x, x, op); } void vpaddusw(const Xmm& x, const Operand& op) { vpaddusw(x, x, op); } void vpaddw(const Xmm& x, const Operand& op) { vpaddw(x, x, op); } -void vpalignr(const Xmm& x, const Operand& op, uint8 imm) { vpalignr(x, x, op, imm); } +void vpalignr(const Xmm& x, const Operand& op, uint8_t imm) { vpalignr(x, x, op, imm); } void vpand(const Xmm& x, const Operand& op) { vpand(x, x, op); } void vpandn(const Xmm& x, const Operand& op) { vpandn(x, x, op); } void vpavgb(const Xmm& x, const Operand& op) { vpavgb(x, x, op); } void vpavgw(const Xmm& x, const Operand& op) { vpavgw(x, x, op); } -void vpblendd(const Xmm& x, const Operand& op, uint8 imm) { vpblendd(x, x, op, imm); } +void vpblendd(const Xmm& x, const Operand& op, uint8_t imm) { vpblendd(x, x, op, imm); } void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { vpblendvb(x1, x1, op, x4); } -void vpblendw(const Xmm& x, const Operand& op, uint8 imm) { vpblendw(x, x, op, imm); } -void vpclmulqdq(const Xmm& x, const Operand& op, uint8 imm) { vpclmulqdq(x, x, op, imm); } +void vpblendw(const Xmm& x, const Operand& op, uint8_t imm) { vpblendw(x, x, op, imm); } +void vpclmulqdq(const Xmm& x, const Operand& op, uint8_t imm) { vpclmulqdq(x, x, op, imm); } void vpcmpeqb(const Xmm& x, const Operand& op) { vpcmpeqb(x, x, op); } void vpcmpeqd(const Xmm& x, const Operand& op) { vpcmpeqd(x, x, op); } void vpcmpeqq(const Xmm& x, const Operand& op) { vpcmpeqq(x, x, op); } @@ -1468,10 +1520,10 @@ void vphaddw(const Xmm& x, const Operand& op) { vphaddw(x, x, op); } void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); } void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); } void vphsubw(const Xmm& x, const Operand& op) { vphsubw(x, x, op); } -void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); } -void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); } -void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); } -void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); } +void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); } +void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); } +void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); } +void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); } void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); } void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); } void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); } @@ -1499,23 +1551,23 @@ void vpsignb(const Xmm& x, const Operand& op) { vpsignb(x, x, op); } void vpsignd(const Xmm& x, const Operand& op) { vpsignd(x, x, op); } void vpsignw(const Xmm& x, const Operand& op) { vpsignw(x, x, op); } void vpslld(const Xmm& x, const Operand& op) { vpslld(x, x, op); } -void vpslld(const Xmm& x, uint8 imm) { vpslld(x, x, imm); } -void vpslldq(const Xmm& x, uint8 imm) { vpslldq(x, x, imm); } +void vpslld(const Xmm& x, uint8_t imm) { vpslld(x, x, imm); } +void vpslldq(const Xmm& x, uint8_t imm) { vpslldq(x, x, imm); } void vpsllq(const Xmm& x, const Operand& op) { vpsllq(x, x, op); } -void vpsllq(const Xmm& x, uint8 imm) { vpsllq(x, x, imm); } +void vpsllq(const Xmm& x, uint8_t imm) { vpsllq(x, x, imm); } void vpsllw(const Xmm& x, const Operand& op) { vpsllw(x, x, op); } -void vpsllw(const Xmm& x, uint8 imm) { vpsllw(x, x, imm); } +void vpsllw(const Xmm& x, uint8_t imm) { vpsllw(x, x, imm); } void vpsrad(const Xmm& x, const Operand& op) { vpsrad(x, x, op); } -void vpsrad(const Xmm& x, uint8 imm) { vpsrad(x, x, imm); } +void vpsrad(const Xmm& x, uint8_t imm) { vpsrad(x, x, imm); } void vpsraw(const Xmm& x, const Operand& op) { vpsraw(x, x, op); } -void vpsraw(const Xmm& x, uint8 imm) { vpsraw(x, x, imm); } +void vpsraw(const Xmm& x, uint8_t imm) { vpsraw(x, x, imm); } void vpsrld(const Xmm& x, const Operand& op) { vpsrld(x, x, op); } -void vpsrld(const Xmm& x, uint8 imm) { vpsrld(x, x, imm); } -void vpsrldq(const Xmm& x, uint8 imm) { vpsrldq(x, x, imm); } +void vpsrld(const Xmm& x, uint8_t imm) { vpsrld(x, x, imm); } +void vpsrldq(const Xmm& x, uint8_t imm) { vpsrldq(x, x, imm); } void vpsrlq(const Xmm& x, const Operand& op) { vpsrlq(x, x, op); } -void vpsrlq(const Xmm& x, uint8 imm) { vpsrlq(x, x, imm); } +void vpsrlq(const Xmm& x, uint8_t imm) { vpsrlq(x, x, imm); } void vpsrlw(const Xmm& x, const Operand& op) { vpsrlw(x, x, op); } -void vpsrlw(const Xmm& x, uint8 imm) { vpsrlw(x, x, imm); } +void vpsrlw(const Xmm& x, uint8_t imm) { vpsrlw(x, x, imm); } void vpsubb(const Xmm& x, const Operand& op) { vpsubb(x, x, op); } void vpsubd(const Xmm& x, const Operand& op) { vpsubd(x, x, op); } void vpsubq(const Xmm& x, const Operand& op) { vpsubq(x, x, op); } @@ -1534,11 +1586,11 @@ void vpunpcklqdq(const Xmm& x, const Operand& op) { vpunpcklqdq(x, x, op); } void vpunpcklwd(const Xmm& x, const Operand& op) { vpunpcklwd(x, x, op); } void vpxor(const Xmm& x, const Operand& op) { vpxor(x, x, op); } void vrcpss(const Xmm& x, const Operand& op) { vrcpss(x, x, op); } -void vroundsd(const Xmm& x, const Operand& op, uint8 imm) { vroundsd(x, x, op, imm); } -void vroundss(const Xmm& x, const Operand& op, uint8 imm) { vroundss(x, x, op, imm); } +void vroundsd(const Xmm& x, const Operand& op, uint8_t imm) { vroundsd(x, x, op, imm); } +void vroundss(const Xmm& x, const Operand& op, uint8_t imm) { vroundss(x, x, op, imm); } void vrsqrtss(const Xmm& x, const Operand& op) { vrsqrtss(x, x, op); } -void vshufpd(const Xmm& x, const Operand& op, uint8 imm) { vshufpd(x, x, op, imm); } -void vshufps(const Xmm& x, const Operand& op, uint8 imm) { vshufps(x, x, op, imm); } +void vshufpd(const Xmm& x, const Operand& op, uint8_t imm) { vshufpd(x, x, op, imm); } +void vshufps(const Xmm& x, const Operand& op, uint8_t imm) { vshufps(x, x, op, imm); } void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); } void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); } void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); } @@ -1554,21 +1606,39 @@ void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } void cdqe() { db(0x48); db(0x98); } void cqo() { db(0x48); db(0x99); } void cmpsq() { db(0x48); db(0xA7); } +void popfq() { db(0x9D); } +void pushfq() { db(0x9C); } +void lodsq() { db(0x48); db(0xAD); } void movsq() { db(0x48); db(0xA5); } void scasq() { db(0x48); db(0xAF); } void stosq() { db(0x48); db(0xAB); } +void syscall() { db(0x0F); db(0x05); } +void sysret() { db(0x0F); db(0x07); } void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); } +void fxrstor64(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xAE); } void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); } void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); } -void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); } -void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); } -void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); } +void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); } +void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); } +void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); } void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); } void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); } void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); } void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); } void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); } void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); } +void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); } +void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); } +void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); } +void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); } +void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); } +void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); } +void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); } +void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); } +void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); } +void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); } +void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); } +void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); } #else void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } @@ -1580,20 +1650,23 @@ void aam() { db(0xD4); db(0x0A); } void aas() { db(0x3F); } void daa() { db(0x27); } void das() { db(0x2F); } +void into() { db(0xCE); } void popad() { db(0x61); } void popfd() { db(0x9D); } void pusha() { db(0x60); } void pushad() { db(0x60); } void pushfd() { db(0x9C); } void popa() { db(0x61); } +void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC5, 0x100); } +void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0xC4, 0x100); } #endif #ifndef XBYAK_NO_OP_NAMES void and(const Operand& op1, const Operand& op2) { and_(op1, op2); } -void and(const Operand& op, uint32 imm) { and_(op, imm); } +void and(const Operand& op, uint32_t imm) { and_(op, imm); } void or(const Operand& op1, const Operand& op2) { or_(op1, op2); } -void or(const Operand& op, uint32 imm) { or_(op, imm); } +void or(const Operand& op, uint32_t imm) { or_(op, imm); } void xor(const Operand& op1, const Operand& op2) { xor_(op1, op2); } -void xor(const Operand& op, uint32 imm) { xor_(op, imm); } +void xor(const Operand& op, uint32_t imm) { xor_(op, imm); } void not(const Operand& op) { not_(op); } #endif #ifndef XBYAK_DISABLE_AVX512 @@ -1610,17 +1683,17 @@ void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); } void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); } void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); } -void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } +void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); } void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); } void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); } void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); } -void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } +void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); } void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); } void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); } void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); } -void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } +void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); } void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); } -void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } +void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); } void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); } void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); } void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); } @@ -1635,14 +1708,14 @@ void kortestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 void kortestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x98); } void kortestw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x98); } void korw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x45); } -void kshiftlb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); } -void kshiftld(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); } -void kshiftlq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); } -void kshiftlw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); } -void kshiftrb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); } -void kshiftrd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); } -void kshiftrq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); } -void kshiftrw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); } +void kshiftlb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); } +void kshiftld(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); } +void kshiftlq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); } +void kshiftlw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); } +void kshiftrb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); } +void kshiftrd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); } +void kshiftrq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); } +void kshiftrw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); } void ktestb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x99); } void ktestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x99); } void ktestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x99); } @@ -1662,8 +1735,8 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); } void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); } void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); } -void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); } -void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); } +void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); } void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); } void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); } void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); } @@ -1676,14 +1749,16 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_ void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); } void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); } void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); } -void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } -void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); } +void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); } +void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } +void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); } void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); } void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); } void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); } +void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } +void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); } void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); } void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); } void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); } @@ -1708,27 +1783,28 @@ void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); } void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); } -void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } +void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); } +void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); } void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); } void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); } void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); } void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); } -void vextractf32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); } -void vextractf32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); } -void vextractf64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); } -void vextractf64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); } -void vextracti32x4(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); } -void vextracti32x8(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); } -void vextracti64x2(const Operand& op, const Ymm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::XMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); } -void vextracti64x4(const Operand& op, const Zmm& r, uint8 imm) { if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); } -void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); } -void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); } -void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } -void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } -void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } -void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } -void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } -void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); } +void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); } +void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); } +void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); } +void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); } +void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); } +void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); } +void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); } +void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); } +void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); } +void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); } +void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } +void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); } +void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } +void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } +void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } +void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); } void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); } void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); } void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } @@ -1745,18 +1821,18 @@ void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); } void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); } void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); } -void vgetmantpd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); } -void vgetmantps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); } -void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } -void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } -void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); } -void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); } -void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); } -void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); } -void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); } -void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); } -void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); } -void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8 imm) {if (!op.is(Operand::MEM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); } +void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); } +void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); } +void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } +void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); } +void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); } +void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); } +void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); } +void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); } +void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); } +void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); } +void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); } +void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); } void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } @@ -1769,6 +1845,8 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3 void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); } void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); } +void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); } +void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); } void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); } void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); } void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); } @@ -1785,8 +1863,8 @@ void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66 | T_0F38 | void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); } void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); } void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); } -void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); } -void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); } +void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); } +void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); } void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); } void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); } void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); } @@ -1795,12 +1873,12 @@ void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); } void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); } void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); } -void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); } -void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); } -void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); } -void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); } -void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); } -void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); } +void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); } +void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); } +void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); } +void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); } +void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); } +void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); } void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8B); } void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); } void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); } @@ -1873,39 +1951,39 @@ void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_ void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); } void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); } void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); } -void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } -void vprolq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } +void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x15); } void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x15); } -void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } -void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); } +void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); } void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); } void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); } void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); } void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); } void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); } -void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); } -void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); } +void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); } +void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); } void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); } void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); } void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); } -void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); } -void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); } -void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); } +void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); } +void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); } +void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); } void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); } void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); } void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); } -void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } +void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); } void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); } void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); } -void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } +void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); } void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); } void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); } void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); } -void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); } -void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); } +void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); } +void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); } void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); } void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); } void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); } @@ -1916,10 +1994,10 @@ void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM( void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); } void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); } void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); } -void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); } -void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); } -void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } -void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } +void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); } +void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); } +void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } +void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); } void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); } void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); } void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX, 0x4D); } @@ -1928,14 +2006,14 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_ void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); } void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); } void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); } -void vreducepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); } -void vreduceps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); } -void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } -void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } -void vrndscalepd(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); } -void vrndscaleps(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); } -void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); } -void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); } +void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); } +void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); } +void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } +void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); } +void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); } +void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); } +void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); } +void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); } void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); } void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); } void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); } @@ -1960,10 +2038,10 @@ void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); } void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); } -void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } -void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } -void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } -void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } +void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } +void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } +void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } +void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } #ifdef XBYAK64 void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); } void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index c2474c5b..1516fc33 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -1,6 +1,18 @@ #ifndef XBYAK_XBYAK_UTIL_H_ #define XBYAK_XBYAK_UTIL_H_ +#ifdef XBYAK_ONLY_CLASS_CPU +#include +#include +#include +#include +#ifndef XBYAK_THROW + #define XBYAK_THROW(x) ; + #define XBYAK_THROW_RET(x, y) return y; +#endif +#else +#include + /** utility class and functions for Xbyak Xbyak::util::Clock ; rdtsc timer @@ -8,6 +20,7 @@ @note this header is UNDER CONSTRUCTION! */ #include "xbyak.h" +#endif // XBYAK_ONLY_CLASS_CPU #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) #define XBYAK_INTEL_CPU_SPECIFIC @@ -54,6 +67,20 @@ #endif #endif +#ifdef XBYAK_USE_VTUNE + // -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl + #include + #ifdef _MSC_VER + #pragma comment(lib, "libittnotify.lib") + #endif + #ifdef __linux__ + #include + #endif +#endif +#ifdef __linux__ + #define XBYAK_USE_PERF +#endif + namespace Xbyak { namespace util { typedef enum { @@ -65,7 +92,7 @@ typedef enum { CPU detection class */ class Cpu { - uint64 type_; + uint64_t type_; //system topology bool x2APIC_supported_; static const size_t maxTopologyLevels = 2; @@ -132,6 +159,11 @@ class Cpu { numCores_[level - 1] = extractBit(data[1], 0, 15); } } + /* + Fallback values in case a hypervisor has 0xB leaf zeroed-out. + */ + numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]); + numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]); } else { /* Failed to deremine num of cores without x2APIC support. @@ -199,24 +231,24 @@ public: int displayFamily; // family + extFamily int displayModel; // model + extModel - unsigned int getNumCores(IntelCpuTopologyLevel level) { - if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED); + unsigned int getNumCores(IntelCpuTopologyLevel level) const { + if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0) switch (level) { case SmtLevel: return numCores_[level - 1]; case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1]; - default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED); + default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0) } } unsigned int getDataCacheLevels() const { return dataCacheLevels_; } unsigned int getCoresSharingDataCache(unsigned int i) const { - if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER); + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) return coresSharignDataCache_[i]; } unsigned int getDataCacheSize(unsigned int i) const { - if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER); + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) return dataCacheSize_[i]; } @@ -250,7 +282,7 @@ public: (void)data; #endif } - static inline uint64 getXfeature() + static inline uint64_t getXfeature() { #ifdef XBYAK_INTEL_CPU_SPECIFIC #ifdef _MSC_VER @@ -260,13 +292,13 @@ public: // xgetvb is not support on gcc 4.2 // __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); - return ((uint64)edx << 32) | eax; + return ((uint64_t)edx << 32) | eax; #endif #else return 0; #endif } - typedef uint64 Type; + typedef uint64_t Type; static const Type NONE = 0; static const Type tMMX = 1 << 0; @@ -303,34 +335,39 @@ public: static const Type tADX = 1 << 28; // adcx, adox static const Type tRDSEED = 1 << 29; // rdseed static const Type tSMAP = 1 << 30; // stac - static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest - static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort - static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph - static const Type tMOVBE = uint64(1) << 34; // mobve - static const Type tAVX512F = uint64(1) << 35; - static const Type tAVX512DQ = uint64(1) << 36; - static const Type tAVX512_IFMA = uint64(1) << 37; + static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest + static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort + static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph + static const Type tMOVBE = uint64_t(1) << 34; // mobve + static const Type tAVX512F = uint64_t(1) << 35; + static const Type tAVX512DQ = uint64_t(1) << 36; + static const Type tAVX512_IFMA = uint64_t(1) << 37; static const Type tAVX512IFMA = tAVX512_IFMA; - static const Type tAVX512PF = uint64(1) << 38; - static const Type tAVX512ER = uint64(1) << 39; - static const Type tAVX512CD = uint64(1) << 40; - static const Type tAVX512BW = uint64(1) << 41; - static const Type tAVX512VL = uint64(1) << 42; - static const Type tAVX512_VBMI = uint64(1) << 43; + static const Type tAVX512PF = uint64_t(1) << 38; + static const Type tAVX512ER = uint64_t(1) << 39; + static const Type tAVX512CD = uint64_t(1) << 40; + static const Type tAVX512BW = uint64_t(1) << 41; + static const Type tAVX512VL = uint64_t(1) << 42; + static const Type tAVX512_VBMI = uint64_t(1) << 43; static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual - static const Type tAVX512_4VNNIW = uint64(1) << 44; - static const Type tAVX512_4FMAPS = uint64(1) << 45; - static const Type tPREFETCHWT1 = uint64(1) << 46; - static const Type tPREFETCHW = uint64(1) << 47; - static const Type tSHA = uint64(1) << 48; - static const Type tMPX = uint64(1) << 49; - static const Type tAVX512_VBMI2 = uint64(1) << 50; - static const Type tGFNI = uint64(1) << 51; - static const Type tVAES = uint64(1) << 52; - static const Type tVPCLMULQDQ = uint64(1) << 53; - static const Type tAVX512_VNNI = uint64(1) << 54; - static const Type tAVX512_BITALG = uint64(1) << 55; - static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56; + static const Type tAVX512_4VNNIW = uint64_t(1) << 44; + static const Type tAVX512_4FMAPS = uint64_t(1) << 45; + static const Type tPREFETCHWT1 = uint64_t(1) << 46; + static const Type tPREFETCHW = uint64_t(1) << 47; + static const Type tSHA = uint64_t(1) << 48; + static const Type tMPX = uint64_t(1) << 49; + static const Type tAVX512_VBMI2 = uint64_t(1) << 50; + static const Type tGFNI = uint64_t(1) << 51; + static const Type tVAES = uint64_t(1) << 52; + static const Type tVPCLMULQDQ = uint64_t(1) << 53; + static const Type tAVX512_VNNI = uint64_t(1) << 54; + static const Type tAVX512_BITALG = uint64_t(1) << 55; + static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56; + static const Type tAVX512_BF16 = uint64_t(1) << 57; + static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58; + static const Type tAMX_TILE = uint64_t(1) << 59; + static const Type tAMX_INT8 = uint64_t(1) << 60; + static const Type tAMX_BF16 = uint64_t(1) << 61; Cpu() : type_(NONE) @@ -385,7 +422,7 @@ public: if (type_ & tOSXSAVE) { // check XFEATURE_ENABLED_MASK[2:1] = '11b' - uint64 bv = getXfeature(); + uint64_t bv = getXfeature(); if ((bv & 6) == 6) { if (ECX & (1U << 28)) type_ |= tAVX; if (ECX & (1U << 12)) type_ |= tFMA; @@ -410,6 +447,12 @@ public: if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ; if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW; if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS; + if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; + } + // EAX=07H, ECX=1 + getCpuidEx(7, 1, data); + if (type_ & tAVX512F) { + if (EAX & (1U << 5)) type_ |= tAVX512_BF16; } } } @@ -428,6 +471,9 @@ public: if (EBX & (1U << 14)) type_ |= tMPX; if (EBX & (1U << 29)) type_ |= tSHA; if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; + if (EDX & (1U << 24)) type_ |= tAMX_TILE; + if (EDX & (1U << 25)) type_ |= tAMX_INT8; + if (EDX & (1U << 22)) type_ |= tAMX_BF16; } setFamily(); setNumCores(); @@ -435,9 +481,11 @@ public: } void putFamily() const { +#ifndef XBYAK_ONLY_CLASS_CPU printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", family, model, stepping, extFamily, extModel); printf("display:family=%X, model=%X\n", displayFamily, displayModel); +#endif } bool has(Type type) const { @@ -445,9 +493,10 @@ public: } }; +#ifndef XBYAK_ONLY_CLASS_CPU class Clock { public: - static inline uint64 getRdtsc() + static inline uint64_t getRdtsc() { #ifdef XBYAK_INTEL_CPU_SPECIFIC #ifdef _MSC_VER @@ -455,7 +504,7 @@ public: #else unsigned int eax, edx; __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); - return ((uint64)edx << 32) | eax; + return ((uint64_t)edx << 32) | eax; #endif #else // TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu @@ -477,10 +526,10 @@ public: count_++; } int getCount() const { return count_; } - uint64 getClock() const { return clock_; } + uint64_t getClock() const { return clock_; } void clear() { count_ = 0; clock_ = 0; } private: - uint64 clock_; + uint64_t clock_; int count_; }; @@ -530,7 +579,7 @@ public: { if (n_ == maxTblNum) { fprintf(stderr, "ERR Pack::can't append\n"); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this) } tbl_[n_++] = &t; return *this; @@ -539,7 +588,7 @@ public: { if (n > maxTblNum) { fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW(ERR_BAD_PARAMETER) } n_ = n; for (size_t i = 0; i < n; i++) { @@ -550,7 +599,7 @@ public: { if (n >= n_) { fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax) } return *tbl_[n]; } @@ -563,7 +612,7 @@ public: if (num == size_t(-1)) num = n_ - pos; if (pos + num > n_) { fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); - throw Error(ERR_BAD_PARAMETER); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack()) } Pack pack; pack.n_ = num; @@ -638,9 +687,9 @@ public: , t(t_) { using namespace Xbyak; - if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); + if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM) const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); - if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM); + if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM) const Reg64& _rsp = code->rsp; saveNum_ = (std::max)(0, allRegNum - noSaveNum); const int *tbl = getOrderTbl() + noSaveNum; @@ -682,12 +731,7 @@ public: ~StackFrame() { if (!makeEpilog_) return; - try { - close(); - } catch (std::exception& e) { - printf("ERR:StackFrame %s\n", e.what()); - exit(1); - } + close(); } private: const int *getOrderTbl() const @@ -722,5 +766,137 @@ private: }; #endif -} } // end of util +class Profiler { + int mode_; + const char *suffix_; + const void *startAddr_; +#ifdef XBYAK_USE_PERF + FILE *fp_; +#endif +public: + enum { + None = 0, + Perf = 1, + VTune = 2 + }; + Profiler() + : mode_(None) + , suffix_("") + , startAddr_(0) +#ifdef XBYAK_USE_PERF + , fp_(0) +#endif + { + } + // append suffix to funcName + void setNameSuffix(const char *suffix) + { + suffix_ = suffix; + } + void setStartAddr(const void *startAddr) + { + startAddr_ = startAddr; + } + void init(int mode) + { + mode_ = None; + switch (mode) { + default: + case None: + return; + case Perf: +#ifdef XBYAK_USE_PERF + close(); + { + const int pid = getpid(); + char name[128]; + snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid); + fp_ = fopen(name, "a+"); + if (fp_ == 0) { + fprintf(stderr, "can't open %s\n", name); + return; + } + } + mode_ = Perf; +#endif + return; + case VTune: +#ifdef XBYAK_USE_VTUNE + dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling + if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) { + fprintf(stderr, "VTune profiling is not active\n"); + return; + } + mode_ = VTune; +#endif + return; + } + } + ~Profiler() + { + close(); + } + void close() + { +#ifdef XBYAK_USE_PERF + if (fp_ == 0) return; + fclose(fp_); + fp_ = 0; +#endif + } + void set(const char *funcName, const void *startAddr, size_t funcSize) const + { + if (mode_ == None) return; +#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE) + (void)funcName; + (void)startAddr; + (void)funcSize; +#endif +#ifdef XBYAK_USE_PERF + if (mode_ == Perf) { + if (fp_ == 0) return; + fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_); + /* + perf does not recognize the function name which is less than 3, + so append '_' at the end of the name if necessary + */ + size_t n = strlen(funcName) + strlen(suffix_); + for (size_t i = n; i < 3; i++) { + fprintf(fp_, "_"); + } + fprintf(fp_, "\n"); + fflush(fp_); + } +#endif +#ifdef XBYAK_USE_VTUNE + if (mode_ != VTune) return; + char className[] = ""; + char fileName[] = ""; + iJIT_Method_Load jmethod = {}; + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.class_file_name = className; + jmethod.source_file_name = fileName; + jmethod.method_load_address = const_cast(startAddr); + jmethod.method_size = funcSize; + jmethod.line_number_size = 0; + char buf[128]; + snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_); + jmethod.method_name = buf; + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); +#endif + } + /* + for continuous set + funcSize = endAddr - + */ + void set(const char *funcName, const void *endAddr) + { + set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_); + startAddr_ = endAddr; + } +}; +#endif // XBYAK_ONLY_CLASS_CPU + +} } // end of util + #endif