Squashed 'externals/xbyak/' changes from 88f2f771f..a1ac3750f
a1ac3750f Merge branch 'dev' 5f4ba971f v6.68 cac2c175f update doc 1b08a8248 add test of prefetchiti bef70d9b1 add prefetchit{0,1} f66785876 add detection of prefetchiti 124617ac9 Merge branch 'dev' 1bce8be17 Merge commit 'fbb18f6' into dev bebfe64a3 [test] X32 is optional and remove CFLAGS d8c332cef Merge branch 'Tachi107-small-changes' into dev fbb18f69d ci: use containers 8a974696c test: simplify makefile 1efe9fe7c Merge branch 'dev' bb70083e6 v6.67 436e452d7 update doc 47ff6ef42 I get an error after all on GitHub action 445c0dcec add test of CMPccXADD c9347907d add CMPccXADD dc792cc56 add detection of cmpccxadd 62be84cc8 fix detection of boost b5ac7b0f0 Merge branch 'dev' 05dd400e0 recover a removed line 94eff6246 v6.66 ec3fadeba update doc 00bfaaa7f add test of rao-int 0f2f1aaa6 support rio-int 6c047f480 detect rao-int f07c5c255 fix args of EmptyAllocator::alloc in test 04d3eb5f5 stop if a test script causes an error 1c1d2366f disable boost sample cfb1127c3 tweak 5fcbeb7c4 [sample] change the way of detection of boost bafc1ee60 CXX uses g++ as default value d8cabc6cb remove warning of blace bc73a0816 remove -fno-operator-names option 6989aea94 use CXX instead of g++ a7c5a1bd7 use English 1bfbd8c4d Fix incorrect format strings 0ecef5c28 Merge branch 'dev' 7556c20ba update doc a15709271 v6.65 3b83aab3e add detect_x32 to TARGET 8c64bbbc3 use gcc instead of dpkg for portability 5e9a9b96f test_avx512.sh runs on x32 8ae01b0c2 disable some tests on x32 83b3da217 x32 does not check large disp 693ab8c9d sizeof(void*) = 4 on x32, so disable the test 348e3e548 Merge branch 'dev' 11b9c4dc0 v6.64 459636196 add T_M_K flag to vpmov* 1d3722928 add include path 5e27eddae move CrearError from LabelManager::reset() to CodeGenerator::reset() f8ea5c28d Merge branch 'dev' 20b2b1eae v6.63 3706869f8 desc. of setDefaultEncoding d6f2d7577 add test of setDefaultEncoding 3b0a19c41 vpmadd52{h,l}uq for avx-ifma 95752ebd7 add tAVX_IFMA cd36e31ea [sample] show AMX_FP16/AVX_VNNI_INT8/AVX_NE_CONVERT e5858af27 add setDefaultEncoding 2f7fb0220 modify gen.cpp for AVX-NE-CONVERT/AVX-VNNI-INT8/AMX-FP16 1c5cb7efa add AVX-NE-CONVERT instructions 564fe9acd add AVX-VNNI-INT8 instructions cd14d07b1 add AMX-FP16 instruction 7811f593c Merge pull request #161 from scribam/patch-1 2218f6c08 Update changelog.md 7bccdbbb4 Merge pull request #160 from herumi/dev 5fcf87596 compile nasm-2.15 instead of apt install e31961ea8 v6.62 ff4f9e65c update doc 9c8fb81db disable wrong detection of gcc -Warray-bounds 6b7519659 add serialize e16582696 Merge branch 'JonLiu1993-vcpkg-installition' into dev edbb410fb Add vcpkg installation instructions 055d31242 Merge branch 'dev' 21ab98441 Merge branch 'akodanka-enable_CIV' into dev ed4d598e3 Changes to compile xbyak project for openvino b652430c4 mingw uses __cpuidex 48457bfa0 Merge branch 'dev' 29cb524d1 v6.61.2 1a9a0b0e1 avoid including algorithm header in xbyak_util.h 6fadefd04 Merge branch 'dev' fc1c18a9d update doc f7cae7f11 v6.61.1 6f5ec5cf3 Merge branch 'doyaGu-master' into dev 4554d6bb9 Fix error related to XBYAK_NOEXCEPT git-subtree-dir: externals/xbyak git-subtree-split: a1ac3750f9a639b5a6c6d6c7da4259b8d6790989
This commit is contained in:
parent
5b6e3d8b54
commit
f6fdb5f55a
30 changed files with 589 additions and 155 deletions
18
.github/workflows/main.yml
vendored
18
.github/workflows/main.yml
vendored
|
@ -1,13 +1,21 @@
|
||||||
name: test
|
name: test
|
||||||
on: [push]
|
on: [push]
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: sh
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
test:
|
||||||
name: test
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
container:
|
||||||
|
image: debian:testing
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- run: sudo apt update
|
- run: apt -y update
|
||||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
- run: apt -y install g++-multilib libboost-dev make nasm yasm
|
||||||
- run: make test
|
- run: make test
|
||||||
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||||
|
|
8
Android.bp
Normal file
8
Android.bp
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
//#################################################
|
||||||
|
cc_library_headers {
|
||||||
|
name: "xbyak_headers",
|
||||||
|
vendor: true,
|
||||||
|
export_include_dirs: [
|
||||||
|
"xbyak"
|
||||||
|
],
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||||
|
|
||||||
project(xbyak LANGUAGES CXX VERSION 6.61)
|
project(xbyak LANGUAGES CXX VERSION 6.68)
|
||||||
|
|
||||||
file(GLOB headers xbyak/*.h)
|
file(GLOB headers xbyak/*.h)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
# History
|
# History
|
||||||
|
|
||||||
|
* 2022/Dec/07 ver 6.68 support prefetchit{0,1}
|
||||||
|
* 2022/Nov/30 ver 6.67 support CMPccXADD
|
||||||
|
* 2022/Nov/25 ver 6.66 support RAO-INT
|
||||||
|
* 2022/Nov/22 ver 6.65 consider x32
|
||||||
|
* 2022/Nov/04 ver 6.64 some vmov* support addressing with mask
|
||||||
|
* 2022/Oct/06 ver 6.63 vpmadd52{h,l}uq support AVX-IFMA
|
||||||
|
* 2022/Oct/05 ver 6.63 support amx_fp16/avx_vnni_int8/avx_ne_convert and add setDefaultEncoding()
|
||||||
|
* 2022/Aug/15 ver 6.62 add serialize instruction
|
||||||
|
* 2022/Aug/02 ver 6.61.1 noexcept is supported by Visual Studio 2015 or later
|
||||||
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
||||||
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
||||||
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
||||||
|
|
|
@ -12,3 +12,15 @@ make install
|
||||||
```
|
```
|
||||||
|
|
||||||
These files are copied into `/usr/local/include/xbyak`.
|
These files are copied into `/usr/local/include/xbyak`.
|
||||||
|
|
||||||
|
# Building xbyak - Using vcpkg
|
||||||
|
|
||||||
|
You can download and install xbyak using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
|
||||||
|
|
||||||
|
git clone https://github.com/Microsoft/vcpkg.git
|
||||||
|
cd vcpkg
|
||||||
|
./bootstrap-vcpkg.sh
|
||||||
|
./vcpkg integrate install
|
||||||
|
./vcpkg install xbyak
|
||||||
|
|
||||||
|
The xbyak port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
|
||||||
|
|
|
@ -110,7 +110,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
|
||||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||||
|
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- setDefaultEncoding(PreferredEncoding encoding);
|
||||||
|
- Set the default encoding to select EVEX or VEX.
|
||||||
|
- The default value is EvexEncoding.
|
||||||
|
- This function affects only an instruction that has a PreferredEncoding argument such as vpdpbusd.
|
||||||
|
|
||||||
### Remark
|
### Remark
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
- `k0` is dealt as no mask.
|
- `k0` is dealt as no mask.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
TARGET=../xbyak/xbyak_mnemonic.h
|
TARGET=../xbyak/xbyak_mnemonic.h
|
||||||
BIN=sortline gen_code gen_avx512
|
BIN=sortline gen_code gen_avx512
|
||||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||||
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||||
sortline: sortline.cpp
|
sortline: sortline.cpp
|
||||||
$(CXX) $(CFLAGS) $< -o $@
|
$(CXX) $(CFLAGS) $< -o $@
|
||||||
|
|
|
@ -387,9 +387,6 @@ void putX_X_XM_IMM()
|
||||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||||
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||||
|
|
||||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
|
||||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
|
||||||
|
|
||||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||||
|
@ -695,29 +692,29 @@ void putMov()
|
||||||
int type;
|
int type;
|
||||||
int mode;
|
int mode;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
|
|
||||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||||
|
|
||||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
|
|
||||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
|
@ -827,7 +824,6 @@ void putMisc()
|
||||||
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||||
|
|
||||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
|
||||||
|
|
||||||
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
||||||
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
||||||
|
|
|
@ -560,6 +560,8 @@ void put()
|
||||||
{ 0, "nta", 0x18},
|
{ 0, "nta", 0x18},
|
||||||
{ 2, "wt1", 0x0D},
|
{ 2, "wt1", 0x0D},
|
||||||
{ 1, "w", 0x0D},
|
{ 1, "w", 0x0D},
|
||||||
|
{ 7, "it0", 0x18},
|
||||||
|
{ 6, "it1", 0x18},
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -693,6 +695,7 @@ void put()
|
||||||
{ "lock", 0xF0 },
|
{ "lock", 0xF0 },
|
||||||
|
|
||||||
{ "sahf", 0x9E },
|
{ "sahf", 0x9E },
|
||||||
|
{ "serialize", 0x0F, 0x01, 0xE8 },
|
||||||
{ "stc", 0xF9 },
|
{ "stc", 0xF9 },
|
||||||
{ "std", 0xFD },
|
{ "std", 0xFD },
|
||||||
{ "sti", 0xFB },
|
{ "sti", 0xFB },
|
||||||
|
@ -806,6 +809,23 @@ void put()
|
||||||
printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
|
printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint8_t prefix;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "aadd", 0 },
|
||||||
|
{ "aand", 0x66 },
|
||||||
|
{ "aor", 0xF2 },
|
||||||
|
{ "axor", 0xF3 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
printf("void %s(const Address& addr, const Reg32e ®) { ", p->name);
|
||||||
|
if (p->prefix) printf("db(0x%02X); ", p->prefix);
|
||||||
|
printf("opModM(addr, reg, 0x0F, 0x38, 0x0FC); }\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1666,6 +1686,25 @@ void put()
|
||||||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
int type;
|
||||||
|
uint8_t code;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "vbcstnebf162ps", T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||||
|
{ "vbcstnesh2ps", T_66 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||||
|
{ "vcvtneebf162ps", T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneeph2ps", T_66 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneobf162ps", T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||||
|
{ "vcvtneoph2ps", T_0F38 | T_W0 | T_YMM, 0xB0 }
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
|
||||||
|
}
|
||||||
|
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }");
|
||||||
|
}
|
||||||
// haswell gpr(reg, reg, r/m)
|
// haswell gpr(reg, reg, r/m)
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1755,11 +1794,33 @@ void put()
|
||||||
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||||
|
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||||
|
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
std::string type = type2String(p->type);
|
std::string type = type2String(p->type);
|
||||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// avx-vnni-int8
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
int type;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||||
|
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
std::string type = type2String(p->type);
|
||||||
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1824,6 +1885,34 @@ void put64()
|
||||||
|
|
||||||
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
|
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
|
||||||
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
|
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
|
||||||
|
// CMPccXADD
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
const char *name;
|
||||||
|
uint8_t code;
|
||||||
|
} tbl[] = {
|
||||||
|
{ "be", 0xE6 },
|
||||||
|
{ "b", 0xE2 },
|
||||||
|
{ "le", 0xEE },
|
||||||
|
{ "l", 0xEC },
|
||||||
|
{ "nbe", 0xE7 },
|
||||||
|
{ "nb", 0xE3 },
|
||||||
|
{ "nle", 0xEF },
|
||||||
|
{ "nl", 0xED },
|
||||||
|
{ "no", 0xE1 },
|
||||||
|
{ "np", 0xEB },
|
||||||
|
{ "ns", 0xE9 },
|
||||||
|
{ "nz", 0xE5 },
|
||||||
|
{ "o", 0xE0 },
|
||||||
|
{ "p", 0xEA },
|
||||||
|
{ "s", 0xE8 },
|
||||||
|
{ "z", 0xE4 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0x%02X, false); }\n", p->name, p->code);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void putAMX_TILE()
|
void putAMX_TILE()
|
||||||
|
@ -1842,6 +1931,7 @@ void putAMX_INT8()
|
||||||
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
||||||
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
||||||
|
puts("void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }");
|
||||||
}
|
}
|
||||||
void putAMX_BF16()
|
void putAMX_BF16()
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
project(
|
project(
|
||||||
'xbyak',
|
'xbyak',
|
||||||
'cpp',
|
'cpp',
|
||||||
version: '6.61',
|
version: '6.68',
|
||||||
license: 'BSD-3-Clause',
|
license: 'BSD-3-Clause',
|
||||||
default_options: 'b_ndebug=if-release'
|
default_options: 'b_ndebug=if-release'
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
# Xbyak 6.61 [![Badge Build]][Build Status]
|
# Xbyak 6.68 [![Badge Build]][Build Status]
|
||||||
|
|
||||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||||
|
|
||||||
### News
|
### News
|
||||||
|
|
||||||
|
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||||
- add movdiri, movdir64b, clwb, cldemote
|
- add movdiri, movdir64b, clwb, cldemote
|
||||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||||
|
|
15
readme.txt
15
readme.txt
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.61
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.68
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -166,13 +166,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64],
|
||||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||||
|
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||||
注意
|
注意
|
||||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||||
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
||||||
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
||||||
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
||||||
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
||||||
|
* setDefaultEncoding()でencoding省略時のEVEX/VEXを設定できます。
|
||||||
|
|
||||||
・ラベル
|
・ラベル
|
||||||
|
|
||||||
|
@ -400,6 +402,15 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2022/12/07 ver 6.68 prefetchit{0,1}サポート
|
||||||
|
2022/11/30 ver 6.67 CMPccXADDサポート
|
||||||
|
2022/11/25 ver 6.66 RAO-INTサポート
|
||||||
|
2022/11/22 ver 6.65 x32動作確認
|
||||||
|
2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正
|
||||||
|
2022/10/06 ver 6.63 AVX-IFMA用のvpmadd52{h,l}uq対応
|
||||||
|
2022/10/05 amx_fp16/avx_vnni_int8/avx_ne_convertt対応とsetDefaultEncoding()追加
|
||||||
|
2022/09/15 ver 6.62 serialize追加
|
||||||
|
2022/08/02 ver 6.61.1 noexceptはVisual Studio 2015以降対応
|
||||||
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
||||||
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
||||||
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
XBYAK_INC=../xbyak/xbyak.h
|
XBYAK_INC=../xbyak/xbyak.h
|
||||||
|
CXX?=g++
|
||||||
|
|
||||||
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
|
BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $(CXX) -x c++ -c - 2>/dev/null && echo 1)
|
||||||
UNAME_M=$(shell uname -m)
|
UNAME_M=$(shell uname -m)
|
||||||
|
|
||||||
ONLY_64BIT=0
|
ONLY_64BIT=0
|
||||||
|
@ -104,7 +105,7 @@ profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||||
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o $(TARGET) *.exe profiler profiler-vtune
|
rm -rf $(TARGET) profiler profiler-vtune
|
||||||
|
|
||||||
test : test0.cpp $(XBYAK_INC)
|
test : test0.cpp $(XBYAK_INC)
|
||||||
test64: test0.cpp $(XBYAK_INC)
|
test64: test0.cpp $(XBYAK_INC)
|
||||||
|
|
|
@ -199,7 +199,7 @@ int main(int argc, char *argv[])
|
||||||
quantize2(dest2, src, qTbl);
|
quantize2(dest2, src, qTbl);
|
||||||
for (int i = 0; i < N; i++) {
|
for (int i = 0; i < N; i++) {
|
||||||
if (dest[i] != dest2[i]) {
|
if (dest[i] != dest2[i]) {
|
||||||
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
|
printf("err[%d] %u %u\n", i, dest[i], dest2[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,13 @@ void putCPUinfo(bool onlyCpuidFeature)
|
||||||
{ Cpu::tMOVDIRI, "movdiri" },
|
{ Cpu::tMOVDIRI, "movdiri" },
|
||||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||||
{ Cpu::tCLZERO, "clzero" },
|
{ Cpu::tCLZERO, "clzero" },
|
||||||
|
{ Cpu::tAMX_FP16, "amx_fp16" },
|
||||||
|
{ Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" },
|
||||||
|
{ Cpu::tAVX_NE_CONVERT, "avx_ne_convert" },
|
||||||
|
{ Cpu::tAVX_IFMA, "avx_ifma" },
|
||||||
|
{ Cpu::tRAO_INT, "rao-int" },
|
||||||
|
{ Cpu::tCMPCCXADD, "cmpccxadd" },
|
||||||
|
{ Cpu::tPREFETCHITI, "prefetchiti" },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||||
|
|
|
@ -5,8 +5,8 @@
|
||||||
|
|
||||||
mem_ 4byte x 65536
|
mem_ 4byte x 65536
|
||||||
|
|
||||||
すべての命令は4byte固定
|
all instructions are fixed at 4 bytes.
|
||||||
即値は全て16bit
|
all immediate values are 16-bit.
|
||||||
|
|
||||||
R = A or B
|
R = A or B
|
||||||
vldiR, imm ; R = imm
|
vldiR, imm ; R = imm
|
||||||
|
@ -109,7 +109,7 @@ public:
|
||||||
reg[r] -= imm;
|
reg[r] -= imm;
|
||||||
break;
|
break;
|
||||||
case PUT:
|
case PUT:
|
||||||
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
printf("%c %8u(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||||
break;
|
break;
|
||||||
case JNZ:
|
case JNZ:
|
||||||
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
||||||
|
@ -294,7 +294,7 @@ lp:
|
||||||
p = t;
|
p = t;
|
||||||
n--;
|
n--;
|
||||||
if (n != 0) goto lp;
|
if (n != 0) goto lp;
|
||||||
printf("c=%d(0x%08x)\n", c, c);
|
printf("c=%u(0x%08x)\n", c, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32
|
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
|
||||||
XBYAK_INC=../xbyak/xbyak.h
|
XBYAK_INC=../xbyak/xbyak.h
|
||||||
UNAME_S=$(shell uname -s)
|
UNAME_S=$(shell uname -s)
|
||||||
|
ifeq ($(shell ./detect_x32),x32)
|
||||||
|
X32?=1
|
||||||
|
endif
|
||||||
BIT=32
|
BIT=32
|
||||||
ifeq ($(shell uname -m),x86_64)
|
ifeq ($(shell uname -m),x86_64)
|
||||||
BIT=64
|
BIT=64
|
||||||
|
@ -20,9 +23,9 @@ endif
|
||||||
|
|
||||||
all: $(TARGET)
|
all: $(TARGET)
|
||||||
|
|
||||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||||
|
|
||||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||||
make_nm:
|
make_nm:
|
||||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||||
|
@ -53,12 +56,11 @@ noexception: noexception.cpp ../xbyak/xbyak.h
|
||||||
test_nm: normalize_prefix $(TARGET)
|
test_nm: normalize_prefix $(TARGET)
|
||||||
$(MAKE) -C ../gen
|
$(MAKE) -C ../gen
|
||||||
ifneq ($(ONLY_64BIT),1)
|
ifneq ($(ONLY_64BIT),1)
|
||||||
./test_nm.sh
|
CXX=$(CXX) ./test_nm.sh
|
||||||
./test_nm.sh noexcept
|
CXX=$(CXX) ./test_nm.sh noexcept
|
||||||
./noexception
|
CXX=$(CXX) ./test_nm.sh Y
|
||||||
./test_nm.sh Y
|
CXX=$(CXX) ./test_nm.sh avx512
|
||||||
./test_nm.sh avx512
|
CXX=$(CXX) ./test_address.sh
|
||||||
./test_address.sh
|
|
||||||
./jmp
|
./jmp
|
||||||
./cvt_test32
|
./cvt_test32
|
||||||
endif
|
endif
|
||||||
|
@ -67,32 +69,38 @@ endif
|
||||||
./misc32
|
./misc32
|
||||||
./cvt_test
|
./cvt_test
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_address.sh 64
|
CXX=$(CXX) ./test_address.sh 64
|
||||||
./test_nm.sh 64
|
ifneq ($(X32),1)
|
||||||
./test_nm.sh Y64
|
CXX=$(CXX) ./test_nm.sh 64
|
||||||
|
CXX=$(CXX) ./test_nm.sh Y64
|
||||||
|
endif
|
||||||
./jmp64
|
./jmp64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test_avx: normalize_prefix
|
test_avx: normalize_prefix
|
||||||
ifneq ($(ONLY_64BIT),0)
|
ifneq ($(ONLY_64BIT),0)
|
||||||
./test_avx.sh
|
CXX=$(CXX) ./test_avx.sh
|
||||||
./test_avx.sh Y
|
CXX=$(CXX) ./test_avx.sh Y
|
||||||
endif
|
endif
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_address.sh 64
|
CXX=$(CXX) ./test_avx.sh 64
|
||||||
./test_avx.sh 64
|
ifneq ($(X32),1)
|
||||||
./test_avx.sh Y64
|
CXX=$(CXX) ./test_avx.sh Y64
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test_avx512: normalize_prefix
|
test_avx512: normalize_prefix
|
||||||
ifneq ($(ONLY_64BIT),0)
|
ifneq ($(ONLY_64BIT),0)
|
||||||
./test_avx512.sh
|
CXX=$(CXX) ./test_avx512.sh
|
||||||
endif
|
endif
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_avx512.sh 64
|
CXX=$(CXX) ./test_avx512.sh 64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test:
|
detect_x32: detect_x32.c
|
||||||
|
$(CC) $< -o $@
|
||||||
|
|
||||||
|
test: detect_x32
|
||||||
$(MAKE) test_nm
|
$(MAKE) test_nm
|
||||||
$(MAKE) test_avx
|
$(MAKE) test_avx
|
||||||
$(MAKE) test_avx512
|
$(MAKE) test_avx512
|
||||||
|
@ -104,4 +112,3 @@ lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
|
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
|
||||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||||
../gen/gen_code.exe > $@
|
../gen/gen_code.exe > $@
|
||||||
../gen/gen_avx512.exe >> $@
|
../gen/gen_avx512.exe >> $@
|
||||||
|
|
8
test/detect_x32.c
Normal file
8
test/detect_x32.c
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
#if defined(__x86_64__) && defined(__ILP32__)
|
||||||
|
puts("x32");
|
||||||
|
#endif
|
||||||
|
}
|
|
@ -1807,44 +1807,44 @@ public:
|
||||||
put("vpmovd2m", K, _XMM | _YMM | _ZMM);
|
put("vpmovd2m", K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovq2m", K, _XMM | _YMM | _ZMM);
|
put("vpmovq2m", K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovsqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovsqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovsqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovusqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovusqd", YMM_KZ | _MEM, _ZMM);
|
put("vpmovusqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovsdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
put("vpmovusdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||||
|
|
||||||
put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovsdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovsdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovsdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovusdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovusdw", YMM_KZ | _MEM, _ZMM);
|
put("vpmovusdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovwb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovwb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovwb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovswb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
|
|
||||||
put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
|
put("vpmovuswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||||
put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
|
put("vpmovuswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||||
}
|
}
|
||||||
void putRot()
|
void putRot()
|
||||||
{
|
{
|
||||||
|
|
|
@ -533,6 +533,7 @@ class Test {
|
||||||
"nop",
|
"nop",
|
||||||
|
|
||||||
"sahf",
|
"sahf",
|
||||||
|
"serialize",
|
||||||
"stc",
|
"stc",
|
||||||
"std",
|
"std",
|
||||||
"sti",
|
"sti",
|
||||||
|
@ -1017,9 +1018,7 @@ class Test {
|
||||||
}
|
}
|
||||||
void putCmov() const
|
void putCmov() const
|
||||||
{
|
{
|
||||||
const struct {
|
const char tbl[][4] = {
|
||||||
const char *s;
|
|
||||||
} tbl[] = {
|
|
||||||
"o",
|
"o",
|
||||||
"no",
|
"no",
|
||||||
"b",
|
"b",
|
||||||
|
@ -1053,11 +1052,11 @@ class Test {
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
char buf[32];
|
char buf[32];
|
||||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s);
|
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
|
||||||
put(buf, REG16, REG16|MEM);
|
put(buf, REG16, REG16|MEM);
|
||||||
put(buf, REG32, REG32|MEM);
|
put(buf, REG32, REG32|MEM);
|
||||||
put(buf, REG64, REG64|MEM);
|
put(buf, REG64, REG64|MEM);
|
||||||
snprintf(buf, sizeof(buf), "set%s", tbl[i].s);
|
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
|
||||||
put(buf, REG8|REG8_3|MEM);
|
put(buf, REG8|REG8_3|MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1294,7 +1293,7 @@ class Test {
|
||||||
put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
|
put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
|
||||||
put("movbe", REG16|REG32e, MEM);
|
put("movbe", REG16|REG32e, MEM);
|
||||||
put("movbe", MEM, REG16|REG32e);
|
put("movbe", MEM, REG16|REG32e);
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
|
put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
|
||||||
put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
|
put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
|
||||||
put(p, "qword [rax], 0");
|
put(p, "qword [rax], 0");
|
||||||
|
@ -2608,7 +2607,7 @@ public:
|
||||||
putMPX();
|
putMPX();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
|
|
||||||
#ifdef USE_YASM
|
#ifdef USE_YASM
|
||||||
putRip();
|
putRip();
|
||||||
|
|
183
test/misc.cpp
183
test/misc.cpp
|
@ -5,6 +5,7 @@
|
||||||
#include <xbyak/xbyak_util.h>
|
#include <xbyak/xbyak_util.h>
|
||||||
#include <cybozu/inttype.hpp>
|
#include <cybozu/inttype.hpp>
|
||||||
#include <cybozu/test.hpp>
|
#include <cybozu/test.hpp>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
|
@ -97,13 +98,17 @@ CYBOZU_TEST_AUTO(mov_const)
|
||||||
}
|
}
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff]));
|
||||||
|
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error);
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error);
|
||||||
|
}
|
||||||
#ifdef XBYAK_OLD_DISP_CHECK
|
#ifdef XBYAK_OLD_DISP_CHECK
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000]));
|
||||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff]));
|
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff]));
|
||||||
#else
|
#else
|
||||||
|
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error);
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error);
|
||||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error);
|
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -875,6 +880,10 @@ CYBOZU_TEST_AUTO(vnni)
|
||||||
vpdpbusd(xm0, xm1, xm2);
|
vpdpbusd(xm0, xm1, xm2);
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
||||||
|
setDefaultEncoding(VexEncoding);
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // VEX
|
||||||
|
setDefaultEncoding(EvexEncoding);
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // EVEX
|
||||||
}
|
}
|
||||||
void badVex()
|
void badVex()
|
||||||
{
|
{
|
||||||
|
@ -885,6 +894,8 @@ CYBOZU_TEST_AUTO(vnni)
|
||||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||||
|
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||||
|
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||||
};
|
};
|
||||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
@ -1975,3 +1986,175 @@ CYBOZU_TEST_AUTO(cpu)
|
||||||
Cpu cpu;
|
Cpu cpu;
|
||||||
CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD));
|
CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(minmax)
|
||||||
|
{
|
||||||
|
using namespace Xbyak::util;
|
||||||
|
CYBOZU_TEST_EQUAL((std::min)(3, 4), local::min_(3, 4));
|
||||||
|
CYBOZU_TEST_EQUAL((std::max)(3, 4), local::max_(3, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(rao_int)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
#ifdef XBYAK64
|
||||||
|
aadd(ptr[rax], ecx);
|
||||||
|
aadd(ptr[eax], ecx);
|
||||||
|
aadd(ptr[rax], r10);
|
||||||
|
aand(ptr[rax], ecx);
|
||||||
|
aand(ptr[eax], ecx);
|
||||||
|
aand(ptr[rax], r10);
|
||||||
|
aor(ptr[rax], ecx);
|
||||||
|
aor(ptr[eax], ecx);
|
||||||
|
aor(ptr[rax], r10);
|
||||||
|
axor(ptr[rax], ecx);
|
||||||
|
axor(ptr[eax], ecx);
|
||||||
|
axor(ptr[rax], r10);
|
||||||
|
#else
|
||||||
|
aadd(ptr[eax], ecx);
|
||||||
|
aand(ptr[eax], ecx);
|
||||||
|
aor(ptr[eax], ecx);
|
||||||
|
axor(ptr[eax], ecx);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
#ifdef XBYAK64
|
||||||
|
// aadd
|
||||||
|
0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// aand
|
||||||
|
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x66, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0x66, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// aor
|
||||||
|
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf2, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf2, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
|
||||||
|
// axor
|
||||||
|
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf3, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
0xf3, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||||
|
#else
|
||||||
|
// aadd
|
||||||
|
0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// aand
|
||||||
|
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// aor
|
||||||
|
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
// axor
|
||||||
|
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef XBYAK64
|
||||||
|
CYBOZU_TEST_AUTO(CMPccXADD)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
// 32bit reg
|
||||||
|
cmpbexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpbxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmplexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmplxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnbexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnbxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnlexadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnlxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnoxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnpxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnsxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpnzxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpoxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmppxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpsxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
cmpzxadd(ptr[rax+r10*4], ecx, edx);
|
||||||
|
// 64bit reg
|
||||||
|
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmplexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmplxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmppxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
// 32bit reg
|
||||||
|
0xc4, 0xa2, 0x69, 0xe6, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe2, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xee, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xec, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe7, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe3, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xef, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xed, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe1, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xeb, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe9, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe5, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe0, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xea, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe8, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0x69, 0xe4, 0x0c, 0x90,
|
||||||
|
// 64bit reg
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe6, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe2, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xee, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xec, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe7, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe3, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xef, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xed, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe1, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xeb, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe9, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe5, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe0, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xea, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe8, 0x0c, 0x90,
|
||||||
|
0xc4, 0xa2, 0xe9, 0xe4, 0x0c, 0x90,
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(prefetchiti)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
prefetchit0(ptr[rax]);
|
||||||
|
prefetchit1(ptr[rax]);
|
||||||
|
}
|
||||||
|
} c;
|
||||||
|
const uint8_t tbl[] = {
|
||||||
|
0x0f, 0x18, 0x38,
|
||||||
|
0x0f, 0x18, 0x30
|
||||||
|
};
|
||||||
|
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||||
|
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||||
|
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -56,7 +56,7 @@ void test2()
|
||||||
void test3()
|
void test3()
|
||||||
{
|
{
|
||||||
static struct EmptyAllocator : Xbyak::Allocator {
|
static struct EmptyAllocator : Xbyak::Allocator {
|
||||||
uint8_t *alloc() { return 0; }
|
uint8_t *alloc(size_t) { return 0; }
|
||||||
} emptyAllocator;
|
} emptyAllocator;
|
||||||
struct Code : CodeGenerator {
|
struct Code : CodeGenerator {
|
||||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
|
||||||
sub()
|
sub()
|
||||||
{
|
{
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
CFLAGS="-Wall -I../ $OPT2"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
echo "compile address.cpp"
|
echo "compile address.cpp"
|
||||||
g++ $CFLAGS address.cpp -o address
|
$CXX $CFLAGS address.cpp -o address
|
||||||
|
|
||||||
./address $1 > a.asm
|
./address $1 > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -17,7 +21,7 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./address $1 jit > nm.cpp
|
./address $1 jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame > x.lst
|
./nm_frame > x.lst
|
||||||
diff ok.lst x.lst && echo "ok"
|
diff ok.lst x.lst && echo "ok"
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
Y)
|
Y)
|
||||||
|
@ -31,9 +34,9 @@ Y64)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX"
|
||||||
echo "compile make_nm.cpp"
|
echo "compile make_nm.cpp"
|
||||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||||
|
|
||||||
./make_nm > a.asm
|
./make_nm > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -43,6 +46,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_nm jit > nm.cpp
|
./make_nm jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER="grep -v warning"
|
FILTER="grep -v warning"
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
64)
|
64)
|
||||||
|
@ -18,9 +21,9 @@ case $1 in
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX512"
|
||||||
echo "compile make_512.cpp"
|
echo "compile make_512.cpp"
|
||||||
g++ $CFLAGS make_512.cpp -o make_512
|
$CXX $CFLAGS make_512.cpp -o make_512
|
||||||
|
|
||||||
./make_512 > a.asm
|
./make_512 > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -30,6 +33,6 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_512 jit > nm.cpp
|
./make_512 jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
FILTER=cat
|
FILTER=cat
|
||||||
|
CXX=${CXX:=g++}
|
||||||
|
|
||||||
case $1 in
|
case $1 in
|
||||||
Y)
|
Y)
|
||||||
|
@ -44,9 +47,9 @@ noexcept)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
CFLAGS="-Wall -I../ $OPT2"
|
||||||
echo "compile make_nm.cpp with $CFLAGS"
|
echo "compile make_nm.cpp with $CFLAGS"
|
||||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||||
|
|
||||||
./make_nm > a.asm
|
./make_nm > a.asm
|
||||||
echo "asm"
|
echo "asm"
|
||||||
|
@ -56,6 +59,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
||||||
echo "xbyak"
|
echo "xbyak"
|
||||||
./make_nm jit > nm.cpp
|
./make_nm jit > nm.cpp
|
||||||
echo "compile nm_frame.cpp"
|
echo "compile nm_frame.cpp"
|
||||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||||
./nm_frame | $FILTER > x.lst
|
./nm_frame | $FILTER > x.lst
|
||||||
diff -B ok.lst x.lst && echo "ok"
|
diff -B ok.lst x.lst && echo "ok"
|
||||||
|
|
|
@ -118,7 +118,7 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900)
|
||||||
#undef XBYAK_TLS
|
#undef XBYAK_TLS
|
||||||
#define XBYAK_TLS thread_local
|
#define XBYAK_TLS thread_local
|
||||||
#define XBYAK_VARIADIC_TEMPLATE
|
#define XBYAK_VARIADIC_TEMPLATE
|
||||||
|
@ -144,11 +144,18 @@
|
||||||
#pragma warning(disable : 4127) /* constant expresison */
|
#pragma warning(disable : 4127) /* constant expresison */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Xbyak {
|
namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x6610 /* 0xABCD = A.BC(.D) */
|
VERSION = 0x6680 /* 0xABCD = A.BC(.D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -371,7 +378,7 @@ inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0
|
||||||
|
|
||||||
inline uint32_t VerifyInInt32(uint64_t x)
|
inline uint32_t VerifyInInt32(uint64_t x)
|
||||||
{
|
{
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
|
if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
|
||||||
#endif
|
#endif
|
||||||
return static_cast<uint32_t>(x);
|
return static_cast<uint32_t>(x);
|
||||||
|
@ -1478,7 +1485,6 @@ public:
|
||||||
clabelDefList_.clear();
|
clabelDefList_.clear();
|
||||||
clabelUndefList_.clear();
|
clabelUndefList_.clear();
|
||||||
resetLabelPtrList();
|
resetLabelPtrList();
|
||||||
ClearError();
|
|
||||||
}
|
}
|
||||||
void enterLocal()
|
void enterLocal()
|
||||||
{
|
{
|
||||||
|
@ -1820,7 +1826,7 @@ private:
|
||||||
void setSIB(const RegExp& e, int reg, int disp8N = 0)
|
void setSIB(const RegExp& e, int reg, int disp8N = 0)
|
||||||
{
|
{
|
||||||
uint64_t disp64 = e.getDisp();
|
uint64_t disp64 = e.getDisp();
|
||||||
#ifdef XBYAK64
|
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||||
#ifdef XBYAK_OLD_DISP_CHECK
|
#ifdef XBYAK_OLD_DISP_CHECK
|
||||||
// treat 0xffffffff as 0xffffffffffffffff
|
// treat 0xffffffff as 0xffffffffffffffff
|
||||||
uint64_t high = disp64 >> 32;
|
uint64_t high = disp64 >> 32;
|
||||||
|
@ -2412,18 +2418,21 @@ private:
|
||||||
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
||||||
opVex(x, 0, addr, type, code);
|
opVex(x, 0, addr, type, code);
|
||||||
}
|
}
|
||||||
void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
||||||
{
|
{
|
||||||
|
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0);
|
||||||
|
}
|
||||||
|
int orEvexIf(PreferredEncoding encoding) {
|
||||||
if (encoding == DefaultEncoding) {
|
if (encoding == DefaultEncoding) {
|
||||||
encoding = EvexEncoding;
|
encoding = defaultEncoding_;
|
||||||
}
|
}
|
||||||
if (encoding == EvexEncoding) {
|
if (encoding == EvexEncoding) {
|
||||||
#ifdef XBYAK_DISABLE_AVX512
|
#ifdef XBYAK_DISABLE_AVX512
|
||||||
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
||||||
#endif
|
#endif
|
||||||
type |= T_MUST_EVEX;
|
return T_MUST_EVEX;
|
||||||
}
|
}
|
||||||
opAVX_X_X_XM(x1, x2, op, type, code0);
|
return 0;
|
||||||
}
|
}
|
||||||
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
||||||
{
|
{
|
||||||
|
@ -2508,6 +2517,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
private:
|
private:
|
||||||
bool isDefaultJmpNEAR_;
|
bool isDefaultJmpNEAR_;
|
||||||
|
PreferredEncoding defaultEncoding_;
|
||||||
public:
|
public:
|
||||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||||
|
@ -2787,11 +2797,13 @@ public:
|
||||||
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
||||||
#endif
|
#endif
|
||||||
, isDefaultJmpNEAR_(false)
|
, isDefaultJmpNEAR_(false)
|
||||||
|
, defaultEncoding_(EvexEncoding)
|
||||||
{
|
{
|
||||||
labelMgr_.set(this);
|
labelMgr_.set(this);
|
||||||
}
|
}
|
||||||
void reset()
|
void reset()
|
||||||
{
|
{
|
||||||
|
ClearError();
|
||||||
resetSize();
|
resetSize();
|
||||||
labelMgr_.reset();
|
labelMgr_.reset();
|
||||||
labelMgr_.set(this);
|
labelMgr_.set(this);
|
||||||
|
@ -2823,6 +2835,9 @@ public:
|
||||||
#undef jnl
|
#undef jnl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// set default encoding to select Vex or Evex
|
||||||
|
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
use single byte nop if useMultiByteNop = false
|
use single byte nop if useMultiByteNop = false
|
||||||
*/
|
*/
|
||||||
|
@ -2927,6 +2942,10 @@ static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segmen
|
||||||
#pragma warning(pop)
|
#pragma warning(pop)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
} // end of namespace
|
} // end of namespace
|
||||||
|
|
||||||
#endif // XBYAK_XBYAK_H_
|
#endif // XBYAK_XBYAK_H_
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
const char *getVersionString() const { return "6.61"; }
|
const char *getVersionString() const { return "6.68"; }
|
||||||
|
void aadd(const Address& addr, const Reg32e ®) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
|
void aand(const Address& addr, const Reg32e ®) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
@ -24,6 +26,8 @@ void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXM
|
||||||
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
||||||
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
||||||
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
||||||
|
void aor(const Address& addr, const Reg32e ®) { db(0xF2); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
|
void axor(const Address& addr, const Reg32e ®) { db(0xF3); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||||
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
|
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
|
||||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
|
@ -654,6 +658,8 @@ void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
|
||||||
void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
|
void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
|
||||||
void popf() { db(0x9D); }
|
void popf() { db(0x9D); }
|
||||||
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
||||||
|
void prefetchit0(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0x18); }
|
||||||
|
void prefetchit1(const Address& addr) { opModM(addr, Reg32(6), 0x0F, 0x18); }
|
||||||
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
|
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
|
||||||
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
|
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
|
||||||
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
|
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
|
||||||
|
@ -747,6 +753,7 @@ void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
||||||
void scasb() { db(0xAE); }
|
void scasb() { db(0xAE); }
|
||||||
void scasd() { db(0xAF); }
|
void scasd() { db(0xAF); }
|
||||||
void scasw() { db(0x66); db(0xAF); }
|
void scasw() { db(0x66); db(0xAF); }
|
||||||
|
void serialize() { db(0x0F); db(0x01); db(0xE8); }
|
||||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
||||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
||||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
||||||
|
@ -844,6 +851,8 @@ void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
||||||
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
|
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
|
||||||
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
|
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
|
||||||
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
|
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
|
||||||
|
void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||||
|
void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||||
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
|
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
|
||||||
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
|
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
|
||||||
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
|
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
|
||||||
|
@ -988,6 +997,11 @@ void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T
|
||||||
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
|
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
|
||||||
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
|
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
|
||||||
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
||||||
|
void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||||
|
void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }
|
||||||
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
|
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
|
||||||
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
|
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
|
||||||
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
||||||
|
@ -1191,10 +1205,16 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
||||||
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
|
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
|
||||||
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
|
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
|
||||||
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
|
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
|
||||||
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
|
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
||||||
|
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
||||||
|
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||||
|
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||||
|
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||||
|
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
||||||
|
@ -1226,6 +1246,8 @@ void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if
|
||||||
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
|
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
|
||||||
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
|
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
|
||||||
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
|
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
|
||||||
|
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB5, encoding); }
|
||||||
|
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB4, encoding); }
|
||||||
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
|
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
|
||||||
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
|
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
|
||||||
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
|
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
|
||||||
|
@ -1642,6 +1664,22 @@ void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx())
|
||||||
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
|
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
|
||||||
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
|
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
|
||||||
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
|
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
|
||||||
|
void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE6, false); }
|
||||||
|
void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE2, false); }
|
||||||
|
void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEE, false); }
|
||||||
|
void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEC, false); }
|
||||||
|
void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE7, false); }
|
||||||
|
void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE3, false); }
|
||||||
|
void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEF, false); }
|
||||||
|
void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xED, false); }
|
||||||
|
void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE1, false); }
|
||||||
|
void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEB, false); }
|
||||||
|
void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE9, false); }
|
||||||
|
void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE5, false); }
|
||||||
|
void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE0, false); }
|
||||||
|
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEA, false); }
|
||||||
|
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE8, false); }
|
||||||
|
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE4, false); }
|
||||||
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
||||||
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
||||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
||||||
|
@ -1653,6 +1691,7 @@ void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T
|
||||||
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
||||||
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
||||||
|
void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }
|
||||||
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
||||||
#else
|
#else
|
||||||
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||||
|
@ -1907,7 +1946,6 @@ void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 |
|
||||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
||||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
|
||||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
||||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||||
|
@ -2141,38 +2179,36 @@ void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T
|
||||||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
||||||
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
||||||
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
||||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
|
||||||
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB4); }
|
|
||||||
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
|
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
|
||||||
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
|
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
|
||||||
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
|
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
|
||||||
void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
|
void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
|
||||||
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
|
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
|
||||||
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
|
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
|
||||||
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x31, false); }
|
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x31, false); }
|
||||||
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x33, true); }
|
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x33, true); }
|
||||||
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
|
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
|
||||||
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
|
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
|
||||||
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
|
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
|
||||||
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
|
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
|
||||||
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
|
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
|
||||||
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x32, false); }
|
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x32, false); }
|
||||||
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x35, true); }
|
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x35, true); }
|
||||||
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x34, false); }
|
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x34, false); }
|
||||||
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x21, false); }
|
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x21, false); }
|
||||||
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x23, true); }
|
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x23, true); }
|
||||||
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x22, false); }
|
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x22, false); }
|
||||||
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x25, true); }
|
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x25, true); }
|
||||||
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x24, false); }
|
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x24, false); }
|
||||||
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x20, true); }
|
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x20, true); }
|
||||||
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x11, false); }
|
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x11, false); }
|
||||||
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x13, true); }
|
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x13, true); }
|
||||||
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x12, false); }
|
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x12, false); }
|
||||||
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x15, true); }
|
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x15, true); }
|
||||||
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x14, false); }
|
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x14, false); }
|
||||||
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x10, true); }
|
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x10, true); }
|
||||||
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
|
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
|
||||||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x30, true); }
|
||||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||||
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
||||||
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
#ifdef XBYAK_ONLY_CLASS_CPU
|
#ifdef XBYAK_ONLY_CLASS_CPU
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <algorithm>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#ifndef XBYAK_THROW
|
#ifndef XBYAK_THROW
|
||||||
#define XBYAK_THROW(x) ;
|
#define XBYAK_THROW(x) ;
|
||||||
|
@ -96,6 +95,11 @@ struct TypeT {
|
||||||
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||||
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline T max_(T x, T y) { return x >= y ? x : y; }
|
||||||
|
template<typename T>
|
||||||
|
inline T min_(T x, T y) { return x < y ? x : y; }
|
||||||
|
|
||||||
} // local
|
} // local
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -193,8 +197,8 @@ private:
|
||||||
/*
|
/*
|
||||||
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
||||||
*/
|
*/
|
||||||
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
|
numCores_[SmtLevel - 1] = local::max_(1u, numCores_[SmtLevel - 1]);
|
||||||
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
numCores_[CoreLevel - 1] = local::max_(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
Failed to deremine num of cores without x2APIC support.
|
Failed to deremine num of cores without x2APIC support.
|
||||||
|
@ -237,7 +241,7 @@ private:
|
||||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
actual_logical_cores = local::min_(actual_logical_cores, logical_cores);
|
||||||
}
|
}
|
||||||
assert(actual_logical_cores != 0);
|
assert(actual_logical_cores != 0);
|
||||||
dataCacheSize_[dataCacheLevels_] =
|
dataCacheSize_[dataCacheLevels_] =
|
||||||
|
@ -247,7 +251,7 @@ private:
|
||||||
* (data[2] + 1);
|
* (data[2] + 1);
|
||||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||||
assert(smt_width != 0);
|
assert(smt_width != 0);
|
||||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
coresSharignDataCache_[dataCacheLevels_] = local::max_(actual_logical_cores / smt_width, 1u);
|
||||||
dataCacheLevels_++;
|
dataCacheLevels_++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -302,7 +306,7 @@ public:
|
||||||
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
||||||
{
|
{
|
||||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _WIN32
|
||||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
@ -406,6 +410,13 @@ public:
|
||||||
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
||||||
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
||||||
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
||||||
|
XBYAK_DEFINE_TYPE(68, tAMX_FP16);
|
||||||
|
XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8);
|
||||||
|
XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
|
||||||
|
XBYAK_DEFINE_TYPE(71, tAVX_IFMA);
|
||||||
|
XBYAK_DEFINE_TYPE(72, tRAO_INT);
|
||||||
|
XBYAK_DEFINE_TYPE(73, tCMPCCXADD);
|
||||||
|
XBYAK_DEFINE_TYPE(74, tPREFETCHITI);
|
||||||
|
|
||||||
#undef XBYAK_SPLIT_ID
|
#undef XBYAK_SPLIT_ID
|
||||||
#undef XBYAK_DEFINE_TYPE
|
#undef XBYAK_DEFINE_TYPE
|
||||||
|
@ -545,10 +556,17 @@ public:
|
||||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||||
if (maxNumSubLeaves >= 1) {
|
if (maxNumSubLeaves >= 1) {
|
||||||
getCpuidEx(7, 1, data);
|
getCpuidEx(7, 1, data);
|
||||||
|
if (EAX & (1U << 3)) type_ |= tRAO_INT;
|
||||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||||
if (type_ & tAVX512F) {
|
if (type_ & tAVX512F) {
|
||||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||||
}
|
}
|
||||||
|
if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
|
||||||
|
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
|
||||||
|
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
|
||||||
|
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
|
||||||
|
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
|
||||||
|
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
setFamily();
|
setFamily();
|
||||||
|
@ -771,7 +789,7 @@ public:
|
||||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||||
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
||||||
const Reg64& _rsp = code->rsp;
|
const Reg64& _rsp = code->rsp;
|
||||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
saveNum_ = local::max_(0, allRegNum - noSaveNum);
|
||||||
const int *tbl = getOrderTbl() + noSaveNum;
|
const int *tbl = getOrderTbl() + noSaveNum;
|
||||||
for (int i = 0; i < saveNum_; i++) {
|
for (int i = 0; i < saveNum_; i++) {
|
||||||
code->push(Reg64(tbl[i]));
|
code->push(Reg64(tbl[i]));
|
||||||
|
|
Loading…
Reference in a new issue