From 4ed09fda062f47a3b7a7b288f0c8875716c6974d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 22 Apr 2020 20:25:57 +0100 Subject: [PATCH] Squashed 'externals/xbyak/' content from commit d512551e git-subtree-dir: externals/xbyak git-subtree-split: d512551e914737300ba35f3c049d1b40effbe76d --- CMakeLists.txt | 6 + COPYRIGHT | 47 + Makefile | 24 + gen/Makefile | 26 + gen/avx_type.hpp | 160 ++ gen/b2hex.cpp | 17 + gen/gen_avx512.cpp | 697 ++++++++ gen/gen_code.cpp | 1782 ++++++++++++++++++++ gen/sortline.cpp | 23 + gen/update.bat | 17 + readme.md | 453 +++++ readme.txt | 473 ++++++ sample/Makefile | 109 ++ sample/bf.cpp | 213 +++ sample/bf.vcproj | 427 +++++ sample/calc.cpp | 229 +++ sample/calc.vcproj | 423 +++++ sample/calc2.cpp | 302 ++++ sample/echo.bf | 5 + sample/fizzbuzz.bf | 19 + sample/hello.bf | 3 + sample/jmp_table.cpp | 128 ++ sample/memfunc.cpp | 111 ++ sample/quantize.cpp | 229 +++ sample/quantize.vcproj | 427 +++++ sample/stackframe.cpp | 29 + sample/static_buf.cpp | 41 + sample/test0.cpp | 186 +++ sample/test0.vcproj | 427 +++++ sample/test_util.cpp | 109 ++ sample/test_util.vcproj | 427 +++++ sample/toyvm.cpp | 380 +++++ sample/toyvm.vcproj | 427 +++++ test/6.bat | 8 + test/Makefile | 74 + test/Makefile.win | 14 + test/a.bat | 9 + test/address.cpp | 155 ++ test/bad_address.cpp | 45 + test/cvt_test.cpp | 125 ++ test/cybozu/COPYRIGHT | 27 + test/cybozu/inttype.hpp | 121 ++ test/cybozu/test.hpp | 345 ++++ test/jmp.cpp | 1151 +++++++++++++ test/jmp.sln | 20 + test/jmp.vcproj | 195 +++ test/lib.h | 63 + test/lib_min.cpp | 51 + test/lib_run.cpp | 9 + test/lib_test.cpp | 13 + test/make_512.cpp | 2190 ++++++++++++++++++++++++ test/make_nm.cpp | 3309 +++++++++++++++++++++++++++++++++++++ test/misc.cpp | 105 ++ test/mprotect_test.cpp | 37 + test/nm_frame.cpp | 51 + test/normalize_prefix.cpp | 45 + test/readme.txt | 6 + test/rip-label-imm.cpp | 88 + test/sf_test.cpp | 338 ++++ test/state.pptx | Bin 0 -> 68370 bytes test/test_address.bat | 37 + test/test_address.sh | 42 + test/test_all.bat | 8 + test/test_avx.bat | 42 + test/test_avx.sh | 44 + test/test_avx512.bat | 31 + test/test_avx512.sh | 33 + test/test_avx_all.bat | 9 + test/test_jmp.bat | 4 + test/test_misc.bat | 4 + test/test_mmx.cpp | 78 + test/test_nm.bat | 43 + test/test_nm.sh | 44 + test/test_nm_all.bat | 11 + xbyak.sln | 86 + xbyak/xbyak.h | 2490 ++++++++++++++++++++++++++++ xbyak/xbyak_bin2hex.h | 258 +++ xbyak/xbyak_mnemonic.h | 1928 +++++++++++++++++++++ xbyak/xbyak_util.h | 572 +++++++ 79 files changed, 22734 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 COPYRIGHT create mode 100644 Makefile create mode 100644 gen/Makefile create mode 100644 gen/avx_type.hpp create mode 100644 gen/b2hex.cpp create mode 100644 gen/gen_avx512.cpp create mode 100644 gen/gen_code.cpp create mode 100644 gen/sortline.cpp create mode 100644 gen/update.bat create mode 100644 readme.md create mode 100644 readme.txt create mode 100644 sample/Makefile create mode 100644 sample/bf.cpp create mode 100644 sample/bf.vcproj create mode 100644 sample/calc.cpp create mode 100644 sample/calc.vcproj create mode 100644 sample/calc2.cpp create mode 100644 sample/echo.bf create mode 100644 sample/fizzbuzz.bf create mode 100644 sample/hello.bf create mode 100644 sample/jmp_table.cpp create mode 100644 sample/memfunc.cpp create mode 100644 sample/quantize.cpp create mode 100644 sample/quantize.vcproj create mode 100644 sample/stackframe.cpp create mode 100644 sample/static_buf.cpp create mode 100644 sample/test0.cpp create mode 100644 sample/test0.vcproj create mode 100644 sample/test_util.cpp create mode 100644 sample/test_util.vcproj create mode 100644 sample/toyvm.cpp create mode 100644 sample/toyvm.vcproj create mode 100644 test/6.bat create mode 100644 test/Makefile create mode 100644 test/Makefile.win create mode 100644 test/a.bat create mode 100644 test/address.cpp create mode 100644 test/bad_address.cpp create mode 100644 test/cvt_test.cpp create mode 100644 test/cybozu/COPYRIGHT create mode 100644 test/cybozu/inttype.hpp create mode 100644 test/cybozu/test.hpp create mode 100644 test/jmp.cpp create mode 100644 test/jmp.sln create mode 100644 test/jmp.vcproj create mode 100644 test/lib.h create mode 100644 test/lib_min.cpp create mode 100644 test/lib_run.cpp create mode 100644 test/lib_test.cpp create mode 100644 test/make_512.cpp create mode 100644 test/make_nm.cpp create mode 100644 test/misc.cpp create mode 100644 test/mprotect_test.cpp create mode 100644 test/nm_frame.cpp create mode 100644 test/normalize_prefix.cpp create mode 100644 test/readme.txt create mode 100644 test/rip-label-imm.cpp create mode 100644 test/sf_test.cpp create mode 100644 test/state.pptx create mode 100644 test/test_address.bat create mode 100755 test/test_address.sh create mode 100644 test/test_all.bat create mode 100644 test/test_avx.bat create mode 100755 test/test_avx.sh create mode 100644 test/test_avx512.bat create mode 100755 test/test_avx512.sh create mode 100644 test/test_avx_all.bat create mode 100644 test/test_jmp.bat create mode 100644 test/test_misc.bat create mode 100644 test/test_mmx.cpp create mode 100644 test/test_nm.bat create mode 100755 test/test_nm.sh create mode 100644 test/test_nm_all.bat create mode 100644 xbyak.sln create mode 100644 xbyak/xbyak.h create mode 100644 xbyak/xbyak_bin2hex.h create mode 100644 xbyak/xbyak_mnemonic.h create mode 100644 xbyak/xbyak_util.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..be131b14 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,6 @@ +cmake_minimum_required(VERSION 2.6) +project(xbyak) + +file(GLOB headers xbyak/*.h) +install(FILES ${headers} DESTINATION include/xbyak) + diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 00000000..78d3140b --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,47 @@ + +Copyright (c) 2007 MITSUNARI Shigeo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the copyright owner nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た +す場合に限り、再頒布および使用が許可されます。 + +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 +を含めること。 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 +権表示、本条件一覧、および下記免責条項を含めること。 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 +一切責任を負わないものとします。 diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..a7850a22 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +PREFIX=/usr/local +INSTALL_DIR=$(PREFIX)/include/xbyak + +all: + $(MAKE) -C sample + +clean: + $(MAKE) -C sample clean + +install: + mkdir -p $(INSTALL_DIR) + cp -pR xbyak/*.h $(INSTALL_DIR) + +uninstall: + rm -i $(INSTALL_DIR)/*.h + rmdir $(INSTALL_DIR) + +update: + $(MAKE) -C gen + +test: + $(MAKE) -C test test + +.PHONY: test update diff --git a/gen/Makefile b/gen/Makefile new file mode 100644 index 00000000..53d1a948 --- /dev/null +++ b/gen/Makefile @@ -0,0 +1,26 @@ +TARGET=../xbyak/xbyak_mnemonic.h +BIN=sortline gen_code gen_avx512 +CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers +all: $(TARGET) +sortline: sortline.cpp + $(CXX) $(CFLAGS) $< -o $@ +gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp + $(CXX) $(CFLAGS) $< -o $@ +gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp + $(CXX) $(CFLAGS) $< -o $@ + +$(TARGET): $(BIN) + ./gen_code | ./sortline > $@ + echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@ + ./gen_code omit | ./sortline >> $@ + echo "#endif" >>$@ + ./gen_code fixed >> $@ + echo "#ifndef XBYAK_DISABLE_AVX512" >> $@ + ./gen_avx512 | ./sortline >> $@ + echo "#ifdef XBYAK64" >> $@ + ./gen_avx512 64 | ./sortline >> $@ + echo "#endif" >> $@ + echo "#endif" >> $@ + +clean: + $(RM) $(BIN) $(TARGET) diff --git a/gen/avx_type.hpp b/gen/avx_type.hpp new file mode 100644 index 00000000..244a2c04 --- /dev/null +++ b/gen/avx_type.hpp @@ -0,0 +1,160 @@ +#include +// copy CodeGenerator::AVXtype + enum AVXtype { + // low 3 bit + T_N1 = 1, + T_N2 = 2, + T_N4 = 3, + T_N8 = 4, + T_N16 = 5, + T_N32 = 6, + T_NX_MASK = 7, + // + T_N_VL = 1 << 3, // N * (1, 2, 4) for VL + T_DUP = 1 << 4, // N = (8, 32, 64) + T_66 = 1 << 5, + T_F3 = 1 << 6, + T_F2 = 1 << 7, + T_0F = 1 << 8, + T_0F38 = 1 << 9, + T_0F3A = 1 << 10, + T_L0 = 1 << 11, + T_L1 = 1 << 12, + T_W0 = 1 << 13, + T_W1 = 1 << 14, + T_EW0 = 1 << 15, + T_EW1 = 1 << 16, + T_YMM = 1 << 17, // support YMM, ZMM + T_EVEX = 1 << 18, + T_ER_X = 1 << 19, // xmm{er} + T_ER_Y = 1 << 20, // ymm{er} + T_ER_Z = 1 << 21, // zmm{er} + T_SAE_X = 1 << 22, // xmm{sae} + T_SAE_Y = 1 << 23, // ymm{sae} + T_SAE_Z = 1 << 24, // zmm{sae} + T_MUST_EVEX = 1 << 25, // contains T_EVEX + T_B32 = 1 << 26, // m32bcst + T_B64 = 1 << 27, // m64bcst + T_M_K = 1 << 28, // mem{k} + T_XXX + }; + +const int NONE = 256; // same as Xbyak::CodeGenerator::NONE + +std::string type2String(int type) +{ + std::string str; + int low = type & T_NX_MASK; + if (0 < low) { + const char *tbl[8] = { + "T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32" + }; + assert(low < int(sizeof(tbl) / sizeof(tbl[0]))); + str = tbl[low - 1]; + } + if (type & T_N_VL) { + if (!str.empty()) str += " | "; + str += "T_N_VL"; + } + if (type & T_DUP) { + if (!str.empty()) str += " | "; + str += "T_DUP"; + } + if (type & T_66) { + if (!str.empty()) str += " | "; + str += "T_66"; + } + if (type & T_F3) { + if (!str.empty()) str += " | "; + str += "T_F3"; + } + if (type & T_F2) { + if (!str.empty()) str += " | "; + str += "T_F2"; + } + if (type & T_0F) { + if (!str.empty()) str += " | "; + str += "T_0F"; + } + if (type & T_0F38) { + if (!str.empty()) str += " | "; + str += "T_0F38"; + } + if (type & T_0F3A) { + if (!str.empty()) str += " | "; + str += "T_0F3A"; + } + if (type & T_L0) { + if (!str.empty()) str += " | "; + str += "VEZ_L0"; + } + if (type & T_L1) { + if (!str.empty()) str += " | "; + str += "VEZ_L1"; + } + if (type & T_W0) { + if (!str.empty()) str += " | "; + str += "T_W0"; + } + if (type & T_W1) { + if (!str.empty()) str += " | "; + str += "T_W1"; + } + if (type & T_EW0) { + if (!str.empty()) str += " | "; + str += "T_EW0"; + } + if (type & T_EW1) { + if (!str.empty()) str += " | "; + str += "T_EW1"; + } + if (type & T_YMM) { + if (!str.empty()) str += " | "; + str += "T_YMM"; + } + if (type & T_EVEX) { + if (!str.empty()) str += " | "; + str += "T_EVEX"; + } + if (type & T_ER_X) { + if (!str.empty()) str += " | "; + str += "T_ER_X"; + } + if (type & T_ER_Y) { + if (!str.empty()) str += " | "; + str += "T_ER_Y"; + } + if (type & T_ER_Z) { + if (!str.empty()) str += " | "; + str += "T_ER_Z"; + } + if (type & T_SAE_X) { + if (!str.empty()) str += " | "; + str += "T_SAE_X"; + } + if (type & T_SAE_Y) { + if (!str.empty()) str += " | "; + str += "T_SAE_Y"; + } + if (type & T_SAE_Z) { + if (!str.empty()) str += " | "; + str += "T_SAE_Z"; + } + if (type & T_MUST_EVEX) { + if (!str.empty()) str += " | "; + str += "T_MUST_EVEX"; + } + if (type & T_B32) { + if (!str.empty()) str += " | "; + str += "T_B32"; + } + if (type & T_B64) { + if (!str.empty()) str += " | "; + str += "T_B64"; + } + if (type & T_M_K) { + if (!str.empty()) str += " | "; + str += "T_M_K"; + } + return str; +} diff --git a/gen/b2hex.cpp b/gen/b2hex.cpp new file mode 100644 index 00000000..150ade8b --- /dev/null +++ b/gen/b2hex.cpp @@ -0,0 +1,17 @@ +#include + +int main() +{ + puts("enum {"); + for (int i = 0; i < 256; i++) { + printf(" B"); + for (int j = 0; j < 8; j++) { + putchar(i & (1 << (7 - j)) ? '1' : '0'); + } + printf("= %d", i); + if (i < 255) putchar(','); + putchar('\n'); + } + puts("};"); + return 0; +} \ No newline at end of file diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp new file mode 100644 index 00000000..1c8cf9bc --- /dev/null +++ b/gen/gen_avx512.cpp @@ -0,0 +1,697 @@ +#define XBYAK_DONT_READ_LIST +#include +#include +#include "../xbyak/xbyak.h" +#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +using namespace Xbyak; +#ifdef _MSC_VER + #pragma warning(disable : 4996) // scanf + #define snprintf _snprintf_s +#endif + +#include "avx_type.hpp" + +void putOpmask(bool only64bit) +{ + if (only64bit) { + puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }"); + puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }"); + return; + } + + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "kadd", 0x4A }, + { "kand", 0x41 }, + { "kandn", 0x42 }, + { "kor", 0x45 }, + { "kxnor", 0x46 }, + { "kxor", 0x47 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code); + printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code); + printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); + printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); + } + printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n"); + printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n"); + printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n"); + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "knot", 0x44 }, + { "kortest", 0x98 }, + { "ktest", 0x99 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code); + printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code); + printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); + printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); + } + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "kshiftl", 0x32 }, + { "kshiftr", 0x30 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %sw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); + printf("void %sb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); + printf("void %sd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); + } + } + puts("void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); + puts("void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); + puts("void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); + puts("void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); + + puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); + puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); + puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }"); + puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }"); + + puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }"); + puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }"); + puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }"); + puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }"); + puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }"); + puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }"); +} + +// vcmppd(k, x, op) +void putVcmp() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + bool hasIMM; + } tbl[] = { + { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true }, + { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true }, + { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, + { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, + + { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false }, + { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, + { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, + { 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, + + { 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, + { 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, + { 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, + { 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, + { 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, + { 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, + + { 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, + { 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, + { 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" + , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + } +} + +// XM_X +void putX_XM() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, + // putCvt + { 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, + { 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z }, + { 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, + { 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, + { 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, + { 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, + { 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z }, + { 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z }, + + { 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, + { 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, + + { 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, + { 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, + { 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, + { 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + } + puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); + + puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }"); + puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }"); + puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }"); + puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }"); + + puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }"); + puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }"); +} + +void putM_X() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, + { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + } +} + +void putXM_X() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, + { 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, + + { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, + { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); + } +} + +void putX_X_XM_IMM() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + bool hasIMM; + } tbl[] = { + { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true }, + { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true }, + { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, + { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, + { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, + { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, + { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false }, + { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, + { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, + + { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, + { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, + + { 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, + { 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, + { 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, + { 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, + { 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, + { 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, + { 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, + { 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, + + { 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false }, + { 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false }, + { 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, + { 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, + + { 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true }, + { 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true }, + { 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true }, + { 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true }, + + { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false }, + { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false }, + + { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false }, + { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, + + { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true }, + { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true }, + + { 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false }, + { 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false }, + { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, + { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, + + { 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true }, + { 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, + { 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, + + { 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, + { 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, + + { 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, + { 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, + { 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, + { 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, + + { 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, + { 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, + + { 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" + , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + } +} + +void putShift() +{ + const struct Tbl { + const char *name; + uint8 code; + int idx; + int type; + } tbl[] = { + { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 }, + { "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, + { "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, + { "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, + { "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + } +} + +void putExtractInsert() +{ + { + const struct Tbl { + const char *name; + uint8 code; + int type; + bool isZMM; + } tbl[] = { + { "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, + { "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, + { "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, + { "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, + + { "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, + { "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, + { "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, + { "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; + printf("void %s(const Operand& op, const %s& r, uint8 imm) { if (!op.is(%s)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); + } + } + { + const struct Tbl { + const char *name; + uint8 code; + int type; + bool isZMM; + } tbl[] = { + { "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, + { "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, + { "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, + { "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, + + { "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, + { "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, + { "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, + { "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + const char *x = p.isZMM ? "Zmm" : "Ymm"; + const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; + printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8 imm) {" + "if (!%s) throw Error(ERR_BAD_COMBINATION); " + "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); + } + } +} + +void putBroadcast(bool only64bit) +{ + { + const struct Tbl { + uint8 code; + const char *name; + int type; + int reg; + } tbl[] = { + { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 }, + { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 }, + { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 }, + { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64}, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) { + printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code); + } + } + } + if (only64bit) return; + puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }"); + puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }"); + puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }"); + puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }"); + puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }"); + + puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }"); + puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }"); + puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }"); + puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }"); + puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }"); +} + +void putCvt() +{ + puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }"); + puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }"); + puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }"); + puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }"); + puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }"); + puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }"); + puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }"); + puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }"); + + puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }"); + puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }"); + puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }"); + puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }"); + puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }"); + puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); + puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); +} + +enum { // same as xbyak.h + xx_yy_zz = 0, + xx_yx_zy = 1, + xx_xy_yz = 2, +}; +void putGather() +{ + const struct Tbl { + const char *name; + int type; + uint8 code; + int mode; + } tbl[] = { + { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, + { "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy }, + { "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz }, + { "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz }, + { "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz }, + { "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy }, + { "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz }, + { "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); + } +} +void putScatter() +{ + const struct Tbl { + const char *name; + int type; + uint8 code; + int mode; // reverse of gather + } tbl[] = { + { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, + { "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy }, + { "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz }, + { "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz }, + + { "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz }, + { "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy }, + { "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz }, + { "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); + } +} + +void putShuff() +{ + puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); + puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); + puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); + puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); +} + +void putMov() +{ + puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }"); + puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }"); + puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }"); + puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }"); + + puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }"); + puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }"); + puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }"); + puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }"); + + { + const struct Tbl { + uint8_t code; + const char *name; + int type; + int mode; + } tbl[] = { + { 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, + { 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, + { 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, + + { 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + { 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + { 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + + { 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + + { 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + { 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + { 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, + + { 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + + { 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + { 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false"); + } + } +} + +void putX_XM_IMM() +{ + const struct Tbl { + uint8 code; + const char *name; + int type; + bool hasIMM; + } tbl[] = { + { 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, + { 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, + { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + + { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + + { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, + { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, + + { 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, + { 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, + + { 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, + { 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" + , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + } +} + +void putMisc() +{ + puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }"); + puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }"); + + puts("void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }"); + puts("void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }"); + puts("void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }"); + puts("void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }"); + + puts("void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }"); + puts("void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }"); + puts("void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }"); + puts("void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }"); + + puts("void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }"); + puts("void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }"); + puts("void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }"); + puts("void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }"); + + puts("void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }"); + puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }"); + puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }"); + puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }"); + + puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); + puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); + puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); + puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); +} + +void putV4FMA() +{ + puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }"); + puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }"); + puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }"); + puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }"); + puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }"); + puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }"); +} + +int main(int argc, char *[]) +{ + bool only64bit = argc == 2; + putOpmask(only64bit); + putBroadcast(only64bit); + if (only64bit) return 0; + putVcmp(); + putX_XM(); + putM_X(); + putXM_X(); + putX_X_XM_IMM(); + putShift(); + putExtractInsert(); + putCvt(); + putGather(); + putShuff(); + putMov(); + putX_XM_IMM(); + putMisc(); + putScatter(); + putV4FMA(); +} diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp new file mode 100644 index 00000000..4e5fd89d --- /dev/null +++ b/gen/gen_code.cpp @@ -0,0 +1,1782 @@ +#define XBYAK_DONT_READ_LIST +#include +#include +#include "xbyak/xbyak.h" +#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) + +using namespace Xbyak; +#ifdef _MSC_VER + #pragma warning(disable : 4996) // scanf + #define snprintf _snprintf_s +#endif + +#include "avx_type.hpp" +/* + reg = cx/ecx/rcx + insert 0x67 if prefix is true +*/ +void put_jREGz(const char *reg, bool prefix) +{ + printf("void j%sz(std::string label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); + printf("void j%sz(const Label& label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); +} + +struct GenericTbl { + const char *name; + uint8 code1; + uint8 code2; + uint8 code3; +}; + +void putGeneric(const GenericTbl *p, size_t n) +{ + for (size_t i = 0; i < n; i++) { + printf("void %s() { db(0x%02X); ", p->name, p->code1); + if (p->code2) printf("db(0x%02X); ", p->code2); + if (p->code3) printf("db(0x%02X); ", p->code3); + printf("}\n"); + p++; + } +} + +void putX_X_XM(bool omitOnly) +{ + // (x, x, x/m[, imm]) or (y, y, y/m[, imm]) + { + const struct Tbl { + uint8 code; + const char *name; + int type; + bool hasIMM; + bool enableOmit; + int mode; // 1 : sse, 2 : avx, 3 : sse + avx + } tbl[] = { + { 0x0D, "blendpd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, + { 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, + { 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 }, + { 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, + { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, + { 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, + { 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 }, + { 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 }, + { 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 }, + { 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0, true, true, 3 }, + { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, + { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, + + { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, + { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, + { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, + { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, + { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, + + { 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true, 2 }, + { 0xC2, "cmpps", T_0F | T_YMM, true, true, 2 }, + { 0xC2, "cmpsd", T_0F | T_F2, true, true, 2 }, + { 0xC2, "cmpss", T_0F | T_F3, true, true, 2 }, + { 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 }, + { 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 }, + { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true, 2 }, + { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + + { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true, 2 }, + + { 0xDB, "pand", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true, 2 }, + + { 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0x74, "pcmpeqb", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x75, "pcmpeqw", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x76, "pcmpeqd", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x29, "pcmpeqq", T_0F38 | T_66 | T_YMM, false, true, 3 }, + + { 0x64, "pcmpgtb", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x65, "pcmpgtw", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x66, "pcmpgtd", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0x37, "pcmpgtq", T_0F38 | T_66 | T_YMM, false, true, 3 }, + + { 0x01, "phaddw", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x02, "phaddd", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x03, "phaddsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, + + { 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, + { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, + { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, + { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, + { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, + + { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 3 }, + + { 0xEB, "por", T_0F | T_66 | T_YMM, false, true, 2 }, + { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false, 2 }, + + { 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true, 2 }, + { 0x0A, "psignd", T_0F38 | T_66 | T_YMM, false, true, 2 }, + + { 0xF1, "psllw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, + { 0xF2, "pslld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, + { 0xF3, "psllq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, + + { 0xE1, "psraw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, + { 0xE2, "psrad", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, + { 0xD1, "psrlw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, + { 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, + { 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, + + { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + + { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + + { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + + { 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, + { 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + + { 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true, 2 }, + + { 0x53, "rcpss", T_0F | T_F3, false, true, 2 }, + { 0x52, "rsqrtss", T_0F | T_F3, false, true, 2 }, + + { 0xC6, "shufpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, true, 2 }, + { 0xC6, "shufps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, true, true, 2 }, + + { 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true, 2 }, + { 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true, 2 }, + + { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + + { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, + { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + if (omitOnly) { + if (p->enableOmit) { + printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8 imm" : "", p->name, p->hasIMM ? ", imm" : ""); + } + } else { + if (p->mode & 1) { + if (p->hasIMM) { + printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); + } else { + printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code); + } + } + if (p->mode & 2) { + printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" + , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + } + } + } + } +} + +void put() +{ + const int NO = CodeGenerator::NONE; + { + char buf[16]; + unsigned int v = VERSION; + if (v & 0xF) { + snprintf(buf, sizeof(buf), "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF); + } else { + snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF); + } + printf("const char *getVersionString() const { return \"%s\"; }\n", buf); + } + const int B = 1 << 0; + const int W = 1 << 1; + const int D = 1 << 2; + const int Q = 1 << 3; + { + const struct Tbl { + uint8 code; + const char *name; + } tbl[] = { + // MMX + { 0x6B, "packssdw" }, + { 0x63, "packsswb" }, + { 0x67, "packuswb" }, + + { 0xDB, "pand" }, + { 0xDF, "pandn" }, + + { 0xF5, "pmaddwd" }, + { 0xE4, "pmulhuw" }, + { 0xE5, "pmulhw" }, + { 0xD5, "pmullw" }, + + { 0xEB, "por" }, + + { 0x68, "punpckhbw" }, + { 0x69, "punpckhwd" }, + { 0x6A, "punpckhdq" }, + + { 0x60, "punpcklbw" }, + { 0x61, "punpcklwd" }, + { 0x62, "punpckldq" }, + + { 0xEF, "pxor" }, + + // MMX2 + { 0xE0, "pavgb" }, + { 0xE3, "pavgw" }, + { 0xEE, "pmaxsw" }, + { 0xDE, "pmaxub" }, + { 0xEA, "pminsw" }, + { 0xDA, "pminub" }, + { 0xF6, "psadbw" }, + // + { 0xD4, "paddq" }, + { 0xF4, "pmuludq" }, + { 0xFB, "psubq" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" + , p->name, p->code); + } + } + + { + const struct Tbl { + uint8 code; + int mode; + const char *name; + } tbl[] = { + { 0xFC, B|W|D, "padd" }, + { 0xEC, B|W , "padds" }, + { 0xDC, B|W , "paddus" }, + { 0x74, B|W|D, "pcmpeq" }, + { 0x64, B|W|D, "pcmpgt" }, + { 0xF0, W|D|Q, "psll" }, + { 0xE0, W|D , "psra" }, + { 0xD0, W|D|Q, "psrl" }, + { 0xF8, B|W|D, "psub" }, + { 0xE8, B|W , "psubs" }, + { 0xD8, B|W , "psubus" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + static const char modTbl[][4] = { + "b", "w", "d", "q" + }; + for (int j = 0; j < 4; j++) { + // B(0), W(1), D(2), Q(3) + if (!(p->mode & (1 << j))) continue; + printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" + , p->name, modTbl[j] + , p->code | j + ); + } + } + } + + { + const struct Tbl { + uint8 code; + int ext; + int mode; + const char *name; + } tbl[] = { + { 0x70, 6, W|D|Q, "psll" }, + { 0x70, 4, W|D , "psra" }, + { 0x70, 2, W|D|Q, "psrl" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + static const char modTbl[][4] = { + "b", "w", "d", "q" + }; + for (int j = 0; j < 4; j++) { + // B(0), W(1), D(2), Q(3) + if (!(p->mode & (1 << j))) continue; + printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n" + , p->name, modTbl[j] + , p->code | j + , p->ext + ); + } + } + printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 7); + printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 3); + } + + { + const struct Tbl { + uint8 code; + uint8 pref; + const char *name; + } tbl[] = { + { 0x70, 0, "pshufw" }, + { 0x70, 0xF2, "pshuflw" }, + { 0x70, 0xF3, "pshufhw" }, + { 0x70, 0x66, "pshufd" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); + } + } + { + const struct MmxTbl6 { + uint8 code; // for (reg, reg/[mem]) + uint8 code2; // for ([mem], reg) + int pref; + const char *name; + } mmxTbl6[] = { + { 0x6F, 0x7F, 0x66, "movdqa" }, + { 0x6F, 0x7F, 0xF3, "movdqu" }, + // SSE2 + { 0x28, 0x29, NO, "movaps" }, + { 0x10, 0x11, 0xF3, "movss" }, + { 0x10, 0x11, NO, "movups" }, + { 0x28, 0x29, 0x66, "movapd" }, + { 0x10, 0x11, 0xF2, "movsd" }, + { 0x10, 0x11, 0x66, "movupd" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) { + const MmxTbl6 *p = &mmxTbl6[i]; + printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); + printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name); + if (p->pref != NO) printf("db(0x%02X); ", p->pref); + printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2); + } + } + { + enum { + PS = 1 << 0, + SS = 1 << 1, + PD = 1 << 2, + SD = 1 << 3 + }; + const struct { + int code; + const char *name; + } sufTbl[] = { + { NO, "ps" }, + { 0xF3, "ss" }, + { 0x66, "pd" }, + { 0xF2, "sd" }, + }; + const struct Tbl { + uint8 code; + int mode; + const char *name; + bool hasImm; + } tbl[] = { + { 0x58, PS|SS|PD|SD, "add" }, + { 0x55, PS|PD , "andn" }, + { 0x54, PS|PD , "and" }, + { 0xC2, PS|SS|PD|SD, "cmp", true }, + { 0x5E, PS|SS|PD|SD, "div" }, + { 0x5F, PS|SS|PD|SD, "max" }, + { 0x5D, PS|SS|PD|SD, "min" }, + { 0x59, PS|SS|PD|SD, "mul" }, + { 0x56, PS|PD , "or" }, + { 0x53, PS|SS , "rcp" }, + { 0x52, PS|SS , "rsqrt" }, + { 0xC6, PS|PD , "shuf", true }, + { 0x51, PS|SS|PD|SD, "sqrt" }, + { 0x5C, PS|SS|PD|SD, "sub" }, + { 0x15, PS|PD , "unpckh" }, + { 0x14, PS|PD , "unpckl" }, + { 0x57, PS|PD , "xor" }, + // + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { + if (!(p->mode & (1 << j))) continue; + if (p->hasImm) { + // don't change uint8 to int because NO is not in byte + printf("void %s%s(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); + } else { + printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); + } + } + } + } + { + // (XMM, XMM) + const struct Tbl { + uint8 code; + uint8 pref; + const char *name; + } tbl[] = { + { 0xF7, 0x66, "maskmovdqu" }, + { 0x12, 0 , "movhlps" }, + { 0x16, 0 , "movlhps" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name); + if (p->pref) printf("db(0x%02X); ", p->pref); + printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code); + } + } + { + // (XMM, XMM|MEM) + const struct Tbl { + uint8 code; + int pref; + const char *name; + } tbl[] = { + { 0x6D, 0x66, "punpckhqdq" }, + { 0x6C, 0x66, "punpcklqdq" }, + + { 0x2F, NO , "comiss" }, + { 0x2E, NO , "ucomiss" }, + { 0x2F, 0x66, "comisd" }, + { 0x2E, 0x66, "ucomisd" }, + + { 0x5A, 0x66, "cvtpd2ps" }, + { 0x5A, NO , "cvtps2pd" }, + { 0x5A, 0xF2, "cvtsd2ss" }, + { 0x5A, 0xF3, "cvtss2sd" }, + { 0xE6, 0xF2, "cvtpd2dq" }, + { 0xE6, 0x66, "cvttpd2dq" }, + { 0xE6, 0xF3, "cvtdq2pd" }, + { 0x5B, 0x66, "cvtps2dq" }, + { 0x5B, 0xF3, "cvttps2dq" }, + { 0x5B, NO , "cvtdq2ps" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref); + } + } + + { + // special type + const struct Tbl { + uint8 code; + int pref; + const char *name; + const char *cond; + } tbl[] = { + { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" }, + { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" }, + { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" }, + { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" }, + { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" }, + { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" }, + { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" }, + { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" }, + { 0x2A, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" }, + { 0x2D, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" }, + { 0x2C, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" }, + { 0x2C, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond); + } + } + { + // prefetch + const struct Tbl { + int ext; + const char *name; + int code; + } tbl[] = { + { 1, "t0", 0x18}, + { 2, "t1", 0x18}, + { 3, "t2", 0x18}, + { 0, "nta", 0x18}, + { 2, "wt1", 0x0D}, + { 1, "w", 0x0D}, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code); + } + } + { + const struct Tbl { + uint8 code; + int pref; + const char *name; + } tbl[] = { + { 0x16, NO, "movhps" }, + { 0x12, NO, "movlps" }, + { 0x16, 0x66, "movhpd" }, + { 0x12, 0x66, "movlpd" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); + } + } + { + // cmov + const struct Tbl { + uint8 ext; + const char *name; + } tbl[] = { + { 0, "o" }, + { 1, "no" }, + { 2, "b" }, + { 2, "c" }, + { 2, "nae" }, + { 3, "nb" }, + { 3, "ae" }, + { 3, "nc" }, + { 4, "e" }, + { 4, "z" }, + { 5, "ne" }, + { 5, "nz" }, + { 6, "be" }, + { 6, "na" }, + { 7, "nbe" }, + { 7, "a" }, + { 8, "s" }, + { 9, "ns" }, + { 10, "p" }, + { 10, "pe" }, + { 11, "np" }, + { 11, "po" }, + { 12, "l" }, + { 12, "nge" }, + { 13, "nl" }, + { 13, "ge" }, + { 14, "le" }, + { 14, "ng" }, + { 15, "nle" }, + { 15, "g" }, + }; + const char *msg = "//-V524"; // disable warning of PVS-Studio + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void cmov%s(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | %d); }%s\n", p->name, p->ext, msg); + printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); + printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); + printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg); + printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); + printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg); + } + } + //////////////////////////////////////////////////////////////// + { + const GenericTbl tbl[] = { + { "cbw", 0x66, 0x98 }, + { "cdq", 0x99 }, + { "clc", 0xF8 }, + { "cld", 0xFC }, + { "cli", 0xFA }, + { "cmc", 0xF5 }, + + { "cpuid", 0x0F, 0xA2 }, + { "cwd", 0x66, 0x99 }, + { "cwde", 0x98 }, + { "movsb", 0xA4 }, + { "movsw", 0x66, 0xA5 }, + { "movsd", 0xA5 }, + { "rep", 0xF3 }, + + { "lahf", 0x9F }, + { "lock", 0xF0 }, + + { "sahf", 0x9E }, + { "stc", 0xF9 }, + { "std", 0xFD }, + { "sti", 0xFB }, + + { "emms", 0x0F, 0x77 }, + { "pause", 0xF3, 0x90 }, + { "sfence", 0x0F, 0xAE, 0xF8 }, + { "lfence", 0x0F, 0xAE, 0xE8 }, + { "mfence", 0x0F, 0xAE, 0xF0 }, + { "monitor", 0x0F, 0x01, 0xC8 }, + { "mwait", 0x0F, 0x01, 0xC9 }, + + { "rdmsr", 0x0F, 0x32 }, + { "rdpmc", 0x0F, 0x33 }, + { "rdtsc", 0x0F, 0x31 }, + { "rdtscp", 0x0F, 0x01, 0xF9 }, + { "ud2", 0x0F, 0x0B }, + { "wait", 0x9B }, + { "fwait", 0x9B }, + { "wbinvd", 0x0F, 0x09 }, + { "wrmsr", 0x0F, 0x30 }, + { "xlatb", 0xD7 }, + + { "popf", 0x9D }, + { "pushf", 0x9C }, + { "stac", 0x0F, 0x01, 0xCB }, + + { "vzeroall", 0xC5, 0xFC, 0x77 }, + { "vzeroupper", 0xC5, 0xF8, 0x77 }, + { "xgetbv", 0x0F, 0x01, 0xD0 }, + + // FPU + { "f2xm1", 0xD9, 0xF0 }, + { "fabs", 0xD9, 0xE1 }, + { "faddp", 0xDE, 0xC1 }, + { "fchs", 0xD9, 0xE0 }, + + { "fcom", 0xD8, 0xD1 }, + { "fcomp", 0xD8, 0xD9 }, + { "fcompp", 0xDE, 0xD9 }, + { "fcos", 0xD9, 0xFF }, + { "fdecstp", 0xD9, 0xF6 }, + { "fdivp", 0xDE, 0xF9 }, + { "fdivrp", 0xDE, 0xF1 }, + { "fincstp", 0xD9, 0xF7 }, + { "finit", 0x9B, 0xDB, 0xE3 }, + { "fninit", 0xDB, 0xE3 }, + { "fld1", 0xD9, 0xE8 }, + { "fldl2t", 0xD9, 0xE9 }, + { "fldl2e", 0xD9, 0xEA }, + { "fldpi", 0xD9, 0xEB }, + { "fldlg2", 0xD9, 0xEC }, + { "fldln2", 0xD9, 0xED }, + { "fldz", 0xD9, 0xEE }, + { "fmulp", 0xDE, 0xC9 }, + { "fnop", 0xD9, 0xD0 }, + { "fpatan", 0xD9, 0xF3 }, + { "fprem", 0xD9, 0xF8 }, + { "fprem1", 0xD9, 0xF5 }, + { "fptan", 0xD9, 0xF2 }, + { "frndint", 0xD9, 0xFC }, + { "fscale", 0xD9, 0xFD }, + { "fsin", 0xD9, 0xFE }, + { "fsincos", 0xD9, 0xFB }, + { "fsqrt", 0xD9, 0xFA }, + { "fsubp", 0xDE, 0xE9 }, + { "fsubrp", 0xDE, 0xE1 }, + { "ftst", 0xD9, 0xE4 }, + { "fucom", 0xDD, 0xE1 }, + { "fucomp", 0xDD, 0xE9 }, + { "fucompp", 0xDA, 0xE9 }, + { "fxam", 0xD9, 0xE5 }, + { "fxch", 0xD9, 0xC9 }, + { "fxtract", 0xD9, 0xF4 }, + { "fyl2x", 0xD9, 0xF1 }, + { "fyl2xp1", 0xD9, 0xF9 }, + }; + putGeneric(tbl, NUM_OF_ARRAY(tbl)); + } + { + const struct Tbl { + uint8 code; // (reg, reg) + uint8 ext; // (reg, imm) + const char *name; + } tbl[] = { + { 0x10, 2, "adc" }, + { 0x00, 0, "add" }, + { 0x20, 4, "and_" }, + { 0x38, 7, "cmp" }, + { 0x08, 1, "or_" }, + { 0x18, 3, "sbb" }, + { 0x28, 5, "sub" }, + { 0x30, 6, "xor_" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code); + printf("void %s(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); + } + } + + { + const struct Tbl { + uint8 code; + uint8 ext; + const char *name; + } tbl[] = { + { 0x48, 1, "dec" }, + { 0x40, 0, "inc" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext); + } + } + { + const struct Tbl { + uint8 code; + uint8 ext; + const char *name; + } tbl[] = { + { 0xa3, 4, "bt" }, + { 0xab, 5, "bts" }, + { 0xb3, 6, "btr" }, + { 0xbb, 7, "btc" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code); + printf("void %s(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); + } + } + { + const struct Tbl { + uint8 code; + uint8 ext; + const char *name; + } tbl[] = { + { 0xF6, 6, "div" }, + { 0xF6, 7, "idiv" }, + { 0xF6, 5, "imul" }, + { 0xF6, 4, "mul" }, + { 0xF6, 3, "neg" }, + { 0xF6, 2, "not_" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + const std::string name = p->name; + printf("void %s(const Operand& op) { opR_ModM(op, 0, %d, 0x%02X); }\n", p->name, p->ext, p->code); + } + } + { + const struct Tbl { + const char *name; + uint8 ext; + } tbl[] = { + { "rcl", 2 }, + { "rcr", 3 }, + { "rol", 0 }, + { "ror", 1 }, + { "sar", 7 }, + { "shl", 4 }, + { "shr", 5 }, + + { "sal", 4 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext); + printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext); + } + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "shld", 0xA4 }, + { "shrd", 0xAC }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); + printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code); + } + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "bsf", 0xBC }, + { "bsr", 0xBD }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code); + } + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "popcnt", 0xB8 }, + { "tzcnt", 0xBC }, + { "lzcnt", 0xBD }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0x%02X); }\n", p->name, p->code); + } + } + // SSSE3 + { + const struct Tbl { + uint8 code; + const char *name; + } tbl[] = { + { 0x00, "pshufb" }, + { 0x01, "phaddw" }, + { 0x02, "phaddd" }, + { 0x03, "phaddsw" }, + { 0x04, "pmaddubsw" }, + { 0x05, "phsubw" }, + { 0x06, "phsubd" }, + { 0x07, "phsubsw" }, + { 0x08, "psignb" }, + { 0x09, "psignw" }, + { 0x0a, "psignd" }, + { 0x0b, "pmulhrsw" }, + { 0x1c, "pabsb" }, + { 0x1d, "pabsw" }, + { 0x1e, "pabsd" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code); + } + printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); + } + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "pclmullqlqdq", 0 }, + { "pclmulhqlqdq", 1 }, + { "pclmullqhdq", 0x10 }, + { "pclmulhqhdq", 0x11 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x%02X); }\n", p->name, p->code); + } + } + { + const struct Tbl { + uint8 code1; + int code2; + uint8 ext; + const char *name; + } tbl[] = { + { 0x0F, 0xAE, 2, "ldmxcsr" }, + { 0x0F, 0xAE, 3, "stmxcsr" }, + { 0x0F, 0xAE, 7, "clflush" }, // 0x80 is bug of nasm ? + { 0xD9, NONE, 5, "fldcw" }, +// { 0x9B, 0xD9, 7, "fstcw" }, // not correct order for fstcw [eax] on 64bit OS + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Address& addr) { opModM(addr, Reg32(%d), 0x%02X, 0x%02X); }\n", p->name, p->ext, p->code1, p->code2); + } + printf("void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); }\n"); + } + { + const struct Tbl { + uint8 code; + const char *name; + } tbl[] = { + { 0x2B, "movntpd" }, + { 0xE7, "movntdq" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + // cast xmm register to 16bit register to put 0x66 + printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code); + } + } + { + const struct Tbl { + uint8 code; + const char *name; + } tbl[] = { + { 0xBE, "movsx" }, + { 0xB6, "movzx" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code); + } + } + // mpx + { + puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); + puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); + puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }"); + puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opModM(addr, bnd, 0x0F, 0x1A); }"); + puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }"); + puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }"); + puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }"); + puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opModM(addr, bnd, 0x0F, 0x1B); }"); + } + // misc + { + puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }"); + puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }"); + puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); + + puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }"); + puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }"); + puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }"); + puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }"); + puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }"); + puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); + puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); + + puts("void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }"); + puts("void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); }"); + puts("void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); }"); + puts("void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }"); + puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); + puts("void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); + puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); + + puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }"); + puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }"); + puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }"); + puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }"); + puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }"); + puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }"); + puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }"); + puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }"); + puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); }"); + + puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }"); + puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); + puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }"); + puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); + puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }"); + puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }"); + puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }"); + puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }"); + puts("void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); + puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }"); + } + { + const struct Tbl { + uint8 m16; + uint8 m32; + uint8 m64; + uint8 ext; + const char *name; + uint8 m64ext; + } tbl[] = { + { 0x00, 0xD8, 0xDC, 0, "fadd" }, + { 0xDE, 0xDA, 0x00, 0, "fiadd" }, + { 0x00, 0xD8, 0xDC, 2, "fcom" }, + { 0x00, 0xD8, 0xDC, 3, "fcomp" }, + { 0x00, 0xD8, 0xDC, 6, "fdiv" }, + { 0xDE, 0xDA, 0x00, 6, "fidiv" }, + { 0x00, 0xD8, 0xDC, 7, "fdivr" }, + { 0xDE, 0xDA, 0x00, 7, "fidivr" }, + { 0xDE, 0xDA, 0x00, 2, "ficom" }, + { 0xDE, 0xDA, 0x00, 3, "ficomp" }, + { 0xDF, 0xDB, 0xDF, 0, "fild", 5 }, + { 0xDF, 0xDB, 0x00, 2, "fist" }, + { 0xDF, 0xDB, 0xDF, 3, "fistp", 7 }, + { 0xDF, 0xDB, 0xDD, 1, "fisttp" }, + { 0x00, 0xD9, 0xDD, 0, "fld" }, + { 0x00, 0xD8, 0xDC, 1, "fmul" }, + { 0xDE, 0xDA, 0x00, 1, "fimul" }, + { 0x00, 0xD9, 0xDD, 2, "fst" }, + { 0x00, 0xD9, 0xDD, 3, "fstp" }, + { 0x00, 0xD8, 0xDC, 4, "fsub" }, + { 0xDE, 0xDA, 0x00, 4, "fisub" }, + { 0x00, 0xD8, 0xDC, 5, "fsubr" }, + { 0xDE, 0xDA, 0x00, 5, "fisubr" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext); + } + } + { + const struct Tbl { + uint32 code1; + uint32 code2; + const char *name; + } tbl[] = { + { 0xD8C0, 0xDCC0, "fadd" }, + { 0x0000, 0xDEC0, "faddp" }, + + { 0xDAC0, 0x00C0, "fcmovb" }, + { 0xDAC8, 0x00C8, "fcmove" }, + { 0xDAD0, 0x00D0, "fcmovbe" }, + { 0xDAD8, 0x00D8, "fcmovu" }, + { 0xDBC0, 0x00C0, "fcmovnb" }, + { 0xDBC8, 0x00C8, "fcmovne" }, + { 0xDBD0, 0x00D0, "fcmovnbe" }, + { 0xDBD8, 0x00D8, "fcmovnu" }, + + { 0xDBF0, 0x00F0, "fcomi" }, + { 0xDFF0, 0x00F0, "fcomip" }, + { 0xDBE8, 0x00E8, "fucomi" }, + { 0xDFE8, 0x00E8, "fucomip" }, + + { 0xD8F0, 0xDCF8, "fdiv" }, + { 0x0000, 0xDEF8, "fdivp" }, + { 0xD8F8, 0xDCF0, "fdivr" }, + { 0x0000, 0xDEF0, "fdivrp" }, + { 0xD8C8, 0xDCC8, "fmul" }, + { 0x0000, 0xDEC8, "fmulp" }, + { 0xD8E0, 0xDCE8, "fsub" }, + { 0x0000, 0xDEE8, "fsubp" }, + { 0xD8E8, 0xDCE0, "fsubr" }, + { 0x0000, 0xDEE0, "fsubrp" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); + // omit st0 version(like nasm) + if (p->code1) { + printf("void %s(const Fpu& reg1) { opFpuFpu(st0, reg1, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); + } else { + printf("void %s(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); + } + } + } + { + const struct Tbl { + uint8 code1; + uint8 code2; + const char *name; + } tbl[] = { + { 0xD8, 0xD0, "fcom" }, + { 0xD8, 0xD8, "fcomp" }, + { 0xDD, 0xC0, "ffree" }, + { 0xD9, 0xC0, "fld" }, + { 0xDD, 0xD0, "fst" }, + { 0xDD, 0xD8, "fstp" }, + { 0xDD, 0xE0, "fucom" }, + { 0xDD, 0xE8, "fucomp" }, + { 0xD9, 0xC8, "fxch" }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2); + } + } + // AVX + { // pd, ps, sd, ss + const struct Tbl { + uint8 code; + const char *name; + bool only_pd_ps; + } tbl[] = { + { 0x58, "add", false }, + { 0x5C, "sub", false }, + { 0x59, "mul", false }, + { 0x5E, "div", false }, + { 0x5F, "max", false }, + { 0x5D, "min", false }, + { 0x54, "and", true }, + { 0x55, "andn", true }, + { 0x56, "or", true }, + { 0x57, "xor", true }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code); + printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code); + if (p->only_pd_ps) continue; + printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x%02X); }\n", p->name, p->code); + printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x%02X); }\n", p->name, p->code); + } + } + putX_X_XM(false); + + // (x, x/m[, imm]) or (y, y/m[, imm]) + { + const struct Tbl { + uint8 code; + const char *name; + int type; + bool hasIMM; + int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX + } tbl[] = { + { 0x15, "blendvpd", T_0F38 | T_66, false, 1 }, + { 0x14, "blendvps", T_0F38 | T_66, false, 1 }, + { 0x10, "pblendvb", T_0F38 | T_66, false, 1 }, + { 0xDF, "aeskeygenassist", T_0F3A | T_66, true, 3 }, + { 0xDB, "aesimc", T_0F38 | T_66 | T_W0, false, 3 }, + { 0x09, "roundpd", T_0F3A | T_66 | T_YMM, true, 3 }, + { 0x08, "roundps", T_0F3A | T_66 | T_YMM, true, 3 }, + { 0x05, "permilpd", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, 2 }, + { 0x04, "permilps", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, + { 0x61, "pcmpestri", T_0F3A | T_66, true, 3 }, + { 0x60, "pcmpestrm", T_0F3A | T_66, true, 3 }, + { 0x63, "pcmpistri", T_0F3A | T_66, true, 3 }, + { 0x62, "pcmpistrm", T_0F3A | T_66, true, 3 }, + { 0x0E, "testps", T_0F38 | T_66 | T_YMM, false, 2 }, + { 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false, 2 }, + { 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, + { 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, + { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, + { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, + { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_SAE_Z, false, 2 }, + { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, + { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, + { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false, 3 }, + { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false, 2 }, + { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false, 2 }, + { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, + { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, + { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, + { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, + + { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, + { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, + { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, 2 }, + { 0x41, "phminposuw", T_0F38 | T_66, false, 3 }, + + { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, + { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, + { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, + { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, + { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, + { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, + + { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, + { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, + { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, + { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, + { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, + { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, + + { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, + { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true, 2 }, + { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true, 2 }, + + { 0x17, "ptest", T_0F38 | T_66 | T_YMM, false, 3 }, + { 0x53, "rcpps", T_0F | T_YMM, false, 2 }, + { 0x52, "rsqrtps", T_0F | T_YMM, false, 2 }, + + { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false, 2 }, + { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false, 2 }, + + { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, + { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, + + { 0xCC, "sha1rnds4", T_0F3A, true, 1 }, + { 0xC8, "sha1nexte", T_0F38, false, 1 }, + { 0xC9, "sha1msg1", T_0F38, false, 1 }, + { 0xCA, "sha1msg2", T_0F38, false, 1 }, + { 0xCB, "sha256rnds2", T_0F38, false, 1 }, + { 0xCC, "sha256msg1", T_0F38, false, 1 }, + { 0xCD, "sha256msg2", T_0F38, false, 1 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + if (p->mode & 1) { + const char *immS1 = p->hasIMM ? ", uint8 imm" : ""; + const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; + const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE"; + const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE"; + printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf); + } + if (p->mode & 2) { + printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" + , p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); + } + } + } + // (m, x), (m, y) + { + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 }, + { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 }, + { 0x7F, "movdqa", T_0F | T_66 | T_YMM }, + { 0x7F, "movdqu", T_0F | T_F3 | T_YMM }, + { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 }, + { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n" + , p->name, type.c_str(), p->code); + } + } + // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) + { + const struct Tbl { + uint8 code; + const char *name; + int type; + int mode; // 1 : sse, 2 : avx, 3 : sse + avx + } tbl[] = { + { 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 }, + { 0xD0, "addsubps", T_0F | T_F2 | T_YMM, 3 }, + { 0x7C, "haddpd", T_0F | T_66 | T_YMM, 3 }, + { 0x7C, "haddps", T_0F | T_F2 | T_YMM, 3 }, + { 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 }, + { 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 }, + + { 0xDC, "aesenc", T_0F38 | T_66 | T_W0, 3 }, + { 0xDD, "aesenclast", T_0F38 | T_66 | T_W0, 3 }, + { 0xDE, "aesdec", T_0F38 | T_66 | T_W0, 3 }, + { 0xDF, "aesdeclast", T_0F38 | T_66 | T_W0, 3 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + if (p->mode & 1) { + uint8 pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0; + printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : ""); + } + if (p->mode & 2) { + printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n" + , p->name, type.c_str(), p->code); + } + } + } + // vmaskmov + { + const char suf[][8] = { "ps", "pd" }; + for (int i = 0; i < 2; i++) { + printf("void vmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2C + i); + printf("void vmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2E + i); + } + } + // vpmaskmov + { + const char suf[][8] = { "d", "q" }; + for (int i = 0; i < 2; i++) { + printf("void vpmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8C); + printf("void vpmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8E); + } + } + // vpermd, vpermps + { + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { 0x16, "vpermps", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { 0x16, "vpermpd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_YMM | T_B64 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + } + } + // vpermq, vpermpd + { + const struct Tbl { + uint8 code; + const char *name; + int type; + } tbl[] = { + { 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); + } + } + // vcmpeqps + { + const char pred[32][16] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", + "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", + "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", + "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" + }; + const char suf[][4] = { "pd", "ps", "sd", "ss" }; + for (int i = 0; i < 4; i++) { + const char *s = suf[i]; + for (int j = 0; j < 32; j++) { + if (j < 8) { + printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j); + } + printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j); + } + } + } + // vmov(h|l)(pd|ps) + { + const struct Tbl { + bool isH; + bool isPd; + uint8 code; + } tbl[] = { + { true, true, 0x16 }, + { true, false, 0x16 }, + { false, true, 0x12 }, + { false, false, 0x12 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + char c = p.isH ? 'h' : 'l'; + const char *suf = p.isPd ? "pd" : "ps"; + const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; + printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" + , c, suf, type, p.code); + printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" + , c, suf, type, p.code + 1); + } + } + // FMA + { + const struct Tbl { + uint8 code; + const char *name; + bool supportYMM; + } tbl[] = { + { 0x08, "vfmadd", true }, + { 0x09, "vfmadd", false }, + { 0x06, "vfmaddsub", true }, + { 0x07, "vfmsubadd", true }, + { 0x0A, "vfmsub", true }, + { 0x0B, "vfmsub", false }, + { 0x0C, "vfnmadd", true }, + { 0x0D, "vfnmadd", false }, + { 0x0E, "vfnmsub", true }, + { 0x0F, "vfnmsub", false }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + for (int j = 0; j < 2; j++) { + const char sufTbl[][2][8] = { + { "pd", "ps" }, + { "sd", "ss" }, + }; + for (int k = 0; k < 3; k++) { + const struct Ord { + const char *str; + uint8 code; + } ord[] = { + { "132", 0x90 }, + { "213", 0xA0 }, + { "231", 0xB0 }, + }; + int t = T_0F38 | T_66 | T_EVEX; + t |= (j == 0) ? (T_W1 | T_EW1) : (T_W0 | T_EW0); + if (tbl[i].supportYMM) t |= T_YMM; + const std::string suf = sufTbl[tbl[i].supportYMM ? 0 : 1][j]; + if (suf == "pd") { + t |= T_B64; + } else if (suf == "ps") { + t |= T_B32; + } else if (suf == "sd") { + t |= T_ER_X | T_N8; + } else { // ss + t |= T_ER_X | T_N4; + } + std::string type = type2String(t); + printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" + , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code); + } + } + } + } + // FMA others + { + printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n"); + printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n"); + printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); + const struct Tbl { + const char *name; + uint8 code; + int type; + bool ew1; + } tbl[] = { + { "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, + { "vpbroadcastb", 0x78, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N1 }, + { "vpbroadcastw", 0x79, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N2 }, + { "vpbroadcastd", 0x58, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, + { "vpbroadcastq", 0x59, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); + } + + puts("void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); + puts("void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { if (!(op.isXMEM() && y.isYMM())) throw Error(ERR_BAD_COMBINATION); opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); + puts("void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); + puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); + puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); + puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); + puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); + + puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }"); + puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }"); + puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }"); + puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }"); + + puts("void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); + puts("void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); + puts("void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); + puts("void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); + + puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); + puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); + puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); + puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); + + puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) throw Error(ERR_BAD_COMBINATION); opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); + + } + // (x, x, imm), (x, imm) + { + const struct Tbl { + const char *name; + uint8 code; + int idx; + int type; + } tbl[] = { + { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX }, + { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX }, + { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX }, + { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 }, + { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + std::string type = type2String(p.type); + printf("void v%s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); + } + } + // 4-op + { + const struct Tbl { + const char *name; + uint8 code; + } tbl[] = { + { "vblendvpd", 0x4B }, + { "vblendvps", 0x4A }, + { "vpblendvb", 0x4C }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x%02X, x4.getIdx() << 4); }\n", p.name, p.code); + } + } + // mov + { + printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); + printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); + + printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); + printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); + printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); + + printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); + printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); + + printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); + printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); + + puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }"); + puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }"); + puts("void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }"); + puts("void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }"); + + // vmovsd, vmovss + for (int i = 0; i < 2; i++) { + char c1 = i == 0 ? 'd' : 's'; + int type = T_0F | T_EVEX; + type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4); + std::string s = type2String(type); + printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); + printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); + } + } + // cvt + { + puts("void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }"); + puts("void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }"); + puts("void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }"); + puts("void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }"); + + puts("void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); + puts("void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); + + + puts("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }"); + puts("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }"); + + puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }"); + puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); + + puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); + + puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); + puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }"); + + } + // haswell gpr(reg, reg, r/m) + { + const struct Tbl { + const char *name; + int type; + uint8 code; + } tbl[] = { + { "andn", T_0F38, 0xF2 }, + { "mulx", T_F2 | T_0F38, 0xF6 }, + { "pdep", T_F2 | T_0F38, 0xF5 }, + { "pext", T_F3 | T_0F38, 0xF5 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x, true); }\n", p.name, type2String(p.type).c_str(), p.code); + } + } + // gpr(reg, r/m, reg) + { + const struct Tbl { + const char *name; + int type; + uint8 code; + } tbl[] = { + { "bextr", T_0F38, 0xF7 }, + { "bzhi", T_0F38, 0xF5 }, + { "sarx", T_0F38 | T_F3, 0xF7 }, + { "shlx", T_0F38 | T_66, 0xF7 }, + { "shrx", T_0F38 | T_F2, 0xF7 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code); + } + puts("void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); + } + // gpr(reg, r/m) + { + const struct Tbl { + const char *name; + int type; + uint8 code; + uint8 idx; + } tbl[] = { + { "blsi", T_0F38, 0xF3, 3 }, + { "blsmsk", T_0F38, 0xF3, 2 }, + { "blsr", T_0F38, 0xF3, 1 }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code); + } + } + // gather + { + const int y_vx_y = 0; + const int y_vy_y = 1; + const int x_vy_x = 2; + const struct Tbl { + const char *name; + uint8 code; + int w; + int mode; + } tbl[] = { + { "vgatherdpd", 0x92, 1, y_vx_y }, + { "vgatherqpd", 0x93, 1, y_vy_y }, + { "vgatherdps", 0x92, 0, y_vy_y }, + { "vgatherqps", 0x93, 0, x_vy_x }, + { "vpgatherdd", 0x90, 0, y_vy_y }, + { "vpgatherqd", 0x91, 0, x_vy_x }, + { "vpgatherdq", 0x90, 1, y_vx_y }, + { "vpgatherqq", 0x91, 1, y_vy_y }, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl& p = tbl[i]; + printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode); + } + } +} + +void put32() +{ + put_jREGz("cx", true); + put_jREGz("ecx", false); + + const GenericTbl tbl[] = { + { "aaa", 0x37 }, + { "aad", 0xD5, 0x0A }, + { "aam", 0xD4, 0x0A }, + { "aas", 0x3F }, + { "daa", 0x27 }, + { "das", 0x2F }, + { "popad", 0x61 }, + { "popfd", 0x9D }, + { "pusha", 0x60 }, + { "pushad", 0x60 }, + { "pushfd", 0x9C }, + { "popa", 0x61 }, + }; + putGeneric(tbl, NUM_OF_ARRAY(tbl)); +} + +void put64() +{ + put_jREGz("ecx", true); + put_jREGz("rcx", false); + + const GenericTbl tbl[] = { + { "cdqe", 0x48, 0x98 }, + { "cqo", 0x48, 0x99 }, + { "movsq", 0x48, 0xA5 }, + }; + putGeneric(tbl, NUM_OF_ARRAY(tbl)); + + puts("void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }"); + puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); + puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); + puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); + puts("void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); + puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); + + puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }"); + puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }"); + puts("void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }"); + puts("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }"); + + puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }"); + puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); +} + +void putFixed() +{ + puts("#ifdef XBYAK64"); + put64(); + puts("#else"); + put32(); + puts("#endif"); + puts("#ifndef XBYAK_NO_OP_NAMES"); + const char *tbl[] = { + "and", "or", "xor", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *name = tbl[i]; + printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name); + printf("void %s(const Operand& op, uint32 imm) { %s_(op, imm); }\n", name, name); + } + puts("void not(const Operand& op) { not_(op); }"); + puts("#endif"); +} + +void putOmit() +{ + puts("void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { vpinsrb(x, x, op, imm); }"); + puts("void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { vpinsrd(x, x, op, imm); }"); + puts("void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { vpinsrq(x, x, op, imm); }"); + puts("void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { vpinsrw(x, x, op, imm); }"); + + puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }"); + puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }"); + { + const char pred[32][16] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", + "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", + "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", + "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" + }; + const char suf[][4] = { "pd", "ps", "sd", "ss" }; + for (int i = 0; i < 4; i++) { + const char *s = suf[i]; + for (int j = 0; j < 32; j++) { + printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s%s(x, x, op); }\n", pred[j], s, pred[j], s); + } + } + } + { + const char *tbl[] = { + "pslldq", + "psrldq", + "psllw", + "pslld", + "psllq", + "psraw", + "psrad", + "psrlw", + "psrld", + "psrlq", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *name = tbl[i]; + printf("void v%s(const Xmm& x, uint8 imm) { v%s(x, x, imm); }\n", name, name); + } + } + { + const char *tbl[] = { + "vblendvpd", + "vblendvps", + "vpblendvb", + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const char *name = tbl[i]; + printf("void %s(const Xmm& x1, const Operand& op, const Xmm& x4) { %s(x1, x1, op, x4); }\n", name, name); + } + } + putX_X_XM(true); +} + +int main(int argc, char *argv[]) +{ + std::string mode = argc == 2 ? argv[1] : ""; + if (mode == "") { + put(); + } else if (mode == "fixed") { + putFixed(); + } else { + putOmit(); + } +} diff --git a/gen/sortline.cpp b/gen/sortline.cpp new file mode 100644 index 00000000..a70ed9fe --- /dev/null +++ b/gen/sortline.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include + +typedef std::set StrSet; + +int main() +{ + StrSet ss; + std::string line; + while (std::getline(std::cin, line)) { + if (!line.empty() && line[line.size() - 1] == '\n') { + line.resize(line.size() - 1); + } + if (!line.empty()) { + ss.insert(line); + } + } + for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) { + std::cout << *i << std::endl; + } +} diff --git a/gen/update.bat b/gen/update.bat new file mode 100644 index 00000000..161ed87e --- /dev/null +++ b/gen/update.bat @@ -0,0 +1,17 @@ +@echo off +set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS +set TARGET=..\\xbyak\\xbyak_mnemonic.h +set SORT=sortline +cl gen_code.cpp %OPT% +gen_code | %SORT% > %TARGET% +echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET% +gen_code omit | %SORT% >> %TARGET% +echo #endif>>%TARGET% +gen_code fixed >> %TARGET% +cl gen_avx512.cpp %OPT% +echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET% +gen_avx512 | %SORT% >> %TARGET% +echo #ifdef XBYAK64>> %TARGET% +gen_avx512 64 | %SORT% >> %TARGET% +echo #endif>> %TARGET% +echo #endif>> %TARGET% diff --git a/readme.md b/readme.md new file mode 100644 index 00000000..4f5adf0d --- /dev/null +++ b/readme.md @@ -0,0 +1,453 @@ + +Xbyak 5.52 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +============= + +Abstract +------------- + +This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. + +Feature +------------- +header file only +you can use Xbyak's functions at once if xbyak.h is included. + +### Supported Instructions Sets + +MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512 + +### Supported OS + +* Windows Xp, Vista, Windows 7(32bit, 64bit) +* Linux(32bit, 64bit) +* Intel Mac OSX + +### Supported Compilers + +* Visual Studio C++ VC2012 or later +* gcc 4.7 or later +* clang 3.3 +* cygwin gcc 4.5.3 +* icc 7.2 + +>Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc. +Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them. +and_(), or_(), xor_(), not_() are always available. + +Install +------------- + +The following files are necessary. Please add the path to your compile directories. + +* xbyak.h +* xbyak_mnemonic.h + +Linux: + + make install + +These files are copied into /usr/local/include/xbyak + +New Feature +------------- + +Add support for AVX-512 instruction set. + +Syntax +------------- + +Make Xbyak::CodeGenerator and make the class method and get the function +pointer by calling cgetCode() and casting the return value. + + NASM Xbyak + mov eax, ebx --> mov(eax, ebx); + inc ecx inc(ecx); + ret --> ret(); + +### Addressing + + (ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement] + [rip + 32bit disp] ; x64 only + + NASM Xbyak + mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]); + test byte [esp], 4 --> test (byte [esp], 4); + + +How to use Selector(Segment Register) + +>Note: Segment class is not derived from Operand. + +``` +mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]); +mov ax, cs --> mov(ax, cs); +``` + +>you can use ptr for almost memory access unless you specify the size of memory. + +>dword, word and byte are member variables, then don't use dword as unsigned int, for example. + +### AVX + + vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 + vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory + vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3); + +*Remark* +The omitted destination syntax as the following ss disabled. +``` + vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 +``` +define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility. +But the newer version will not support it. + +### AVX-512 + +``` +vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); +vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]); +vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]); +vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2); +vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2); +vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae); + vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary. +vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5); + +vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]); +vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]); +vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]); +vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]); +vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]); +vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4); + +vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword + vcvtpd2dq(xmm16, ptr [eax+33]); // default xword +vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]); +vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256 +vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast + +vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512 +vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit +vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 32-bit to 128-bit +``` +Remark +* k1, ..., k7 are new opmask registers. +* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. +* `k4 | k3` is different from `k3 | k4`. +* use `ptr_b` for broadcast `{1toX}`. X is automatically determined. +* specify xword/yword/zword(_b) for m128/m256/m512 if necessary. + +### Label + + L("L1"); + jmp ("L1"); + + jmp ("L2"); + ... + a few mnemonics(8-bit displacement jmp) + ... + L("L2"); + + jmp ("L3", T_NEAR); + ... + a lot of mnemonics(32-bit displacement jmp) + ... + L("L3"); + +>Call hasUndefinedLabel() to verify your code has no undefined label. +> you can use a label for immediate value of mov like as mov (eax, "L2"); + +#### 1. support @@, @f, @b like MASM + + L("@@"); // + jmp("@b"); // jmp to + jmp("@f"); // jmp to + L("@@"); // + jmp("@b"); // jmp to + mov(eax, "@b"); + jmp(eax); // jmp to + +#### 2. localization of label by calling inLocalLabel(), outLocallabel(). + +labels begining of period between inLocalLabel() and outLocalLabel() +are dealed with local label. +inLocalLabel() and outLocalLabel() can be nested. + + void func1() + { + inLocalLabel(); + L(".lp"); // ; local label + ... + jmp(".lp"); // jmpt to + L("aaa"); // global label + outLocalLabel(); + } + + void func2() + { + inLocalLabel(); + L(".lp"); // ; local label + func1(); + jmp(".lp"); // jmp to + inLocalLabel(); + } + +### Label class + +L() and jxx() functions support a new Label class. + + Label label1, label2; + L(label1); + ... + jmp(label1); + ... + jmp(label2); + ... + L(label2); + +Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel. + + Label label1, label2; + L(label1); + ... + jmp(label2); + ... + assignL(label2, label1); // label2 <= label1 + +The above jmp opecode jumps label1. + +* Restriction: +* srcLabel must be used in L(). +* dstLabel must not be used in L(). + +Label::getAddress() returns the address specified by the label instance and 0 if not specified. +``` +// not AutoGrow mode +Label label; +assert(label.getAddress() == 0); +L(label); +assert(label.getAddress() == getCurr()); +``` + +### Rip +``` +Label label; +mov(eax, ptr [rip + label]); // eax = 4 +... + +L(label); +dd(4); +``` +``` +int x; +... + mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB +``` +### Code size +The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size. + + class Quantize : public Xbyak::CodeGenerator { + public: + Quantize() + : CodeGenerator(8192) + { + } + ... + }; + +### use user allocated memory + +You can make jit code on prepaired memory. + + class Sample : public Xbyak::CodeGenerator { + public: + Sample(void *userPtr, size_t size) + : Xbyak::CodeGenerator(size, userPtr) + { + ... + } + }; + + const size_t codeSize = 1024; + uint8 buf[codeSize + 16]; + + // get 16-byte aligned address + uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf); + + // append executable attribute to the memory + Xbyak::CodeArray::protect(p, codeSize, true); + + // construct your jit code on the memory + Sample s(p, codeSize); + +>See *sample/test0.cpp* + +AutoGrow +------------- + +Under `AutoGrow` mode, Xbyak extends memory automatically if necessary. +Call ready() before calling getCode() to calc address of jmp. +``` + struct Code : Xbyak::CodeGenerator { + Code() + : Xbyak::CodeGenerator(, Xbyak::AutoGrow) + { + ... + } + }; + Code c; + c.ready(); // Don't forget to call this function +``` +>Don't use the address returned by getCurr() before calling ready(). +>It may be invalid address. +>RESTRICTION : rip addressing is not supported in AutoGrow + +Macro +------------- + +* **XBYAK32** is defined on 32bit. +* **XBYAK64** is defined on 64bit. +* **XBYAK64_WIN** is defined on 64bit Windows(VC) +* **XBYAK64_GCC** is defined on 64bit gcc, cygwin +* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names` +* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future) +* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro + +Sample +------------- + +* test0.cpp ; tiny sample of Xbyak(x86, x64) +* quantize.cpp ; JIT optimized quantization by fast division(x86 only) +* calc.cpp ; assemble and estimate a given polynomial(x86, x64) +* bf.cpp ; JIT brainfuck(x86, x64) + +License +------------- + +modified new BSD License +http://opensource.org/licenses/BSD-3-Clause + +The files under test/cybozu/ are copied from cybozulib(https://github.com/herumi/cybozulib/), +which is licensed by BSD-3-Clause and are used for only tests. +The header files under xbyak/ are independent of cybozulib. + +History +------------- +* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage) +* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen) +* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan) +* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan) +* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso) +* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio +* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar) +* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed) +* 2017/May/13 ver 5.42 add movs{b,w,d,q} +* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso) +* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label +* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso) +* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N +* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro) +* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW +* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso) +* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso) +* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38) +* 2016/Nov/20 ver 5.10 add addressing [rip+&var] +* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio) +* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h +* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4 +* 2016/Aug/03 ver 5.01 disable omitted operand +* 2016/Jun/24 ver 5.00 support avx-512 instruction set +* 2016/Jun/13 avx-512 add mask instructions +* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu +* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp) +* 2016/Feb/04 ver 4.90 add jcc(const void *addr); +* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell) +* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere) +* 2015/Oct/05 ver 4.87 support segment selectors +* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere) +* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen) +* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff) +* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik) +* 2015/May/24 ver 4.82 support detection of F16C +* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere) +* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere) +* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac +* 2014/Oct/14 ver 4.70 support MmapAllocator +* 2014/Jun/13 ver 4.62 disable warning of VC2014 +* 2014/May/30 ver 4.61 support bt, bts, btr, btc +* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph +* 2014/Apr/11 ver 4.52 add detection of rdrand +* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels +* 2014/Mar/16 ver 4.50 support new Label +* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox +* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64() +* 2013/Oct/16 ver 4.21 label support std::string +* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64) +* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class +* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label +* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). + support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest). +* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions +* 2013/Mar/27 ver 3.80 support mov(reg, "label"); +* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz() +* 2013/Jan/15 ver 3.75 add setSize() to modify generated code +* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect() +* 2013/Jan/06 ver 3.73 use unordered_map if possible +* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const. +* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined. +* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util. +* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias) +* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit +* 2012/Nov/01 ver 3.61 add fldcw/fstcw +* 2012/May/03 ver 3.60 change interface of Allocator +* 2012/Mar/23 ver 3.51 fix userPtr mode +* 2012/Mar/19 ver 3.50 support AutoGrow mode +* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd +* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat) +* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya) +* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc +* 2011/May/24 ver 3.01 fix typo of OSXSAVE +* 2011/May/23 ver 3.00 add vcmpeqps and so on +* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) +* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe +* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm +* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) +* 2011/Feb/04 ver 2.99 beta support AVX +* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp +* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist +* 2010/Jun/07 ver 2.29 fix call( + jmp("@b"); // jmp to + jmp("@f"); // jmp to + L("@@"); // + jmp("@b"); // jmp to + mov(eax, "@b"); + jmp(eax); // jmp to + +2. ラベルの局所化 + +ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。 +inLocalLabel(), outLocalLabel()は入れ子にすることができます。 + +void func1() +{ + inLocalLabel(); + L(".lp"); // ; ローカルラベル + ... + jmp(".lp"); // jmpt to + L("aaa"); // グローバルラベル + outLocalLabel(); +} + +void func2() +{ + inLocalLabel(); + L(".lp"); // ; ローカルラベル + func1(); + jmp(".lp"); // jmp to + outLocalLabel(); +} + +上記サンプルではinLocalLabel(), outLocalLabel()が無いと、 +".lp"ラベルの二重定義エラーになります。 + +3. 新しいLabelクラスによるジャンプ命令 + +ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。 + + Label label1, label2; + L(label1); + ... + jmp(label1); + ... + jmp(label2); + ... + L(label2); + +更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。 + + Label label1, label2; + L(label1); + ... + jmp(label2); + ... + assignL(label2, label1); + +上記jmp命令はlabel1にジャンプします。 + +制限 +* srcLabelはL()により飛び先が確定していないといけません。 +* dstLabelはL()により飛び先が確定していてはいけません。 + +ラベルは`getAddress()`によりそのアドレスを取得できます。 +未定義のときは0が返ります。 +``` +// not AutoGrow mode +Label label; +assert(label.getAddress(), 0); +L(label); +assert(label.getAddress(), getCurr()); +``` + +・Xbyak::CodeGenerator()コンストラクタインタフェース + +@param maxSize [in] コード生成最大サイズ(デフォルト4096byte) +@param userPtr [in] ユーザ指定メモリ + +CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0); + +デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。 +それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。 + +class Quantize : public Xbyak::CodeGenerator { +public: + Quantize() + : CodeGenerator(8192) + { + } + ... +}; + +またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは +指定されたメモリ上にバイト列を生成します。 + +補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と +与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress() +も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に +してください。 + +/** + change exec permission of memory + @param addr [in] buffer address + @param size [in] buffer size + @param canExec [in] true(enable to exec), false(disable to exec) + @return true(success), false(failure) +*/ +bool CodeArray::protect(const void *addr, size_t size, bool canExec); + +/** + get aligned memory pointer +*/ +uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE); + +その他詳細は各種サンプルを参照してください。 +----------------------------------------------------------------------------- +◎マクロ + +32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が +定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では +XBYAK64_GCCが定義されます。 + +----------------------------------------------------------------------------- +◎使用例 + +test0.cpp ; 簡単な例(x86, x64) +quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86) +calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64) + boost(http://www.boost.org/)が必要 +bf.cpp ; JIT Brainfuck(x86, x64) + +----------------------------------------------------------------------------- +◎ライセンス + +修正された新しいBSDライセンスに従います。 +http://opensource.org/licenses/BSD-3-Clause + +sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から +いただきました。 + +test/cybozu/以下のファイルはcybozulib(https://github.com/herumi/cybozulib/) +の一部を使っています。cybozulibはBSD-3-Clauseライセンスです。 +cybozulibは単体テストでのみ利用されていて、xbyak/ディレクトリ以下のヘッダ +ファイルはcybozulibとは独立に利用できます。 + +----------------------------------------------------------------------------- +◎履歴 + +2017/08/18 ver 5.52 align修正(thanks to MerryMage) +2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen) +2017/08/08 ver 5.50 mpx追加(thanks to magurosan) +2017/08/08 ver 5.45 sha追加(thanks to magurosan) +2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso) +2017/07/12 ver 5.432 PVS-studioの警告を減らす +2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar) +2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed) +2017/05/13 ver 5.42 movs{b,w,d,q}追加 +2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso) +2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加 +2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso) +2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正 +2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro) +2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更 +2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso) +2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso) +2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38) +2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加 +2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio) +2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない +2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正 +2016/08/03 ver 5.01 AVXの省略表記非サポート +2016/07/24 ver 5.00 avx-512フルサポート +2016/06/13 avx-512 opmask命令サポート +2016/05/05 ver 4.91 AVX-512命令の検出サポート +2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp) +2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加 +2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell) +2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere) +2015/08/16 ver 4.87 セグメントセレクタに対応 +2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere) +2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen) +2015/07/22 ver 4.84 call()がvariadic template対応 +2015/05/24 ver 4.83 mobveサポート(thanks to benvanik) +2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加 +2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere) +2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere) +2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート +2014/10/14 ver 4.70 MmapAllocatorのサポート +2014/06/13 ver 4.62 VC2014で警告抑制 +2014/05/30 ver 4.61 bt, bts, btr, btcのサポート +2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート +2014/04/11 ver 4.52 rdrandの判定追加 +2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する +2014/03/16 ver 4.50 新しいラベルクラスのサポート +2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正 +2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート +2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。 +2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離 +2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更 +2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。 +2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm) + support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest) +2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート +2013/03/27 ver 3.80 mov(reg, "label");をサポート +2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加 +2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加 +2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加 +2013/01/06 ver 3.73 可能ならunordered_mapを使う +2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数 +2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした +2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義 +2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias) +2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit +2012/11/01 ver 3.61 add fldcw/fstcw +2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更 +2012/03/23 ver 3.51 userPtrモードがバグったのを修正 +2012/03/19 ver 3.50 AutoGrowモードサポート +2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート +2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat) +2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya) +2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable +2011/03/24 ver 3.01 fix typo of OSXSAVE +2011/03/23 ver 3.00 vcmpeqpsなどを追加 +2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) +2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe +2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm +2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) +2011/02/04 ver 2.99 beta support AVX +2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp +2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist +2010/07/07 ver 2.29 fix call(