externals: Update xbyak to v6.68
Merge commit 'f6fdb5f55a88f73ef7bef45f50cf5878ceec9781'
This commit is contained in:
commit
916d7cf9bd
30 changed files with 589 additions and 155 deletions
18
externals/xbyak/.github/workflows/main.yml
vendored
18
externals/xbyak/.github/workflows/main.yml
vendored
|
@ -1,13 +1,21 @@
|
|||
name: test
|
||||
on: [push]
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: sh
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: test
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: debian:testing
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: sudo apt update
|
||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
||||
- uses: actions/checkout@v3
|
||||
- run: apt -y update
|
||||
- run: apt -y install g++-multilib libboost-dev make nasm yasm
|
||||
- run: make test
|
||||
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||
|
|
8
externals/xbyak/Android.bp
vendored
Normal file
8
externals/xbyak/Android.bp
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
//#################################################
|
||||
cc_library_headers {
|
||||
name: "xbyak_headers",
|
||||
vendor: true,
|
||||
export_include_dirs: [
|
||||
"xbyak"
|
||||
],
|
||||
}
|
2
externals/xbyak/CMakeLists.txt
vendored
2
externals/xbyak/CMakeLists.txt
vendored
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 6.61)
|
||||
project(xbyak LANGUAGES CXX VERSION 6.68)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
|
|
9
externals/xbyak/doc/changelog.md
vendored
9
externals/xbyak/doc/changelog.md
vendored
|
@ -1,5 +1,14 @@
|
|||
# History
|
||||
|
||||
* 2022/Dec/07 ver 6.68 support prefetchit{0,1}
|
||||
* 2022/Nov/30 ver 6.67 support CMPccXADD
|
||||
* 2022/Nov/25 ver 6.66 support RAO-INT
|
||||
* 2022/Nov/22 ver 6.65 consider x32
|
||||
* 2022/Nov/04 ver 6.64 some vmov* support addressing with mask
|
||||
* 2022/Oct/06 ver 6.63 vpmadd52{h,l}uq support AVX-IFMA
|
||||
* 2022/Oct/05 ver 6.63 support amx_fp16/avx_vnni_int8/avx_ne_convert and add setDefaultEncoding()
|
||||
* 2022/Aug/15 ver 6.62 add serialize instruction
|
||||
* 2022/Aug/02 ver 6.61.1 noexcept is supported by Visual Studio 2015 or later
|
||||
* 2022/Jul/29 ver 6.61 fix exception of movzx eax, ah in 64-bit mode
|
||||
* 2022/Jun/16 ver 6.60.2 fix detection of GFNI, VAES, and VPCLMULQDQ
|
||||
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
||||
|
|
12
externals/xbyak/doc/install.md
vendored
12
externals/xbyak/doc/install.md
vendored
|
@ -12,3 +12,15 @@ make install
|
|||
```
|
||||
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
||||
|
||||
# Building xbyak - Using vcpkg
|
||||
|
||||
You can download and install xbyak using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
|
||||
|
||||
git clone https://github.com/Microsoft/vcpkg.git
|
||||
cd vcpkg
|
||||
./bootstrap-vcpkg.sh
|
||||
./vcpkg integrate install
|
||||
./vcpkg install xbyak
|
||||
|
||||
The xbyak port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
|
||||
|
|
8
externals/xbyak/doc/usage.md
vendored
8
externals/xbyak/doc/usage.md
vendored
|
@ -110,7 +110,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64],
|
|||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||
```
|
||||
|
||||
- setDefaultEncoding(PreferredEncoding encoding);
|
||||
- Set the default encoding to select EVEX or VEX.
|
||||
- The default value is EvexEncoding.
|
||||
- This function affects only an instruction that has a PreferredEncoding argument such as vpdpbusd.
|
||||
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
|
|
2
externals/xbyak/gen/Makefile
vendored
2
externals/xbyak/gen/Makefile
vendored
|
@ -1,6 +1,6 @@
|
|||
TARGET=../xbyak/xbyak_mnemonic.h
|
||||
BIN=sortline gen_code gen_avx512
|
||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||
sortline: sortline.cpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
|
|
40
externals/xbyak/gen/gen_avx512.cpp
vendored
40
externals/xbyak/gen/gen_avx512.cpp
vendored
|
@ -387,9 +387,6 @@ void putX_X_XM_IMM()
|
|||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
@ -695,29 +692,29 @@ void putMov()
|
|||
int type;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
|
||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
|
||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
|
||||
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL | T_M_K, false },
|
||||
|
||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
|
||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
|
||||
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL | T_M_K, true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
|
@ -827,7 +824,6 @@ void putMisc()
|
|||
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||
|
||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
||||
|
||||
puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }");
|
||||
puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }");
|
||||
|
|
92
externals/xbyak/gen/gen_code.cpp
vendored
92
externals/xbyak/gen/gen_code.cpp
vendored
|
@ -560,6 +560,8 @@ void put()
|
|||
{ 0, "nta", 0x18},
|
||||
{ 2, "wt1", 0x0D},
|
||||
{ 1, "w", 0x0D},
|
||||
{ 7, "it0", 0x18},
|
||||
{ 6, "it1", 0x18},
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -693,6 +695,7 @@ void put()
|
|||
{ "lock", 0xF0 },
|
||||
|
||||
{ "sahf", 0x9E },
|
||||
{ "serialize", 0x0F, 0x01, 0xE8 },
|
||||
{ "stc", 0xF9 },
|
||||
{ "std", 0xFD },
|
||||
{ "sti", 0xFB },
|
||||
|
@ -806,6 +809,23 @@ void put()
|
|||
printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t prefix;
|
||||
} tbl[] = {
|
||||
{ "aadd", 0 },
|
||||
{ "aand", 0x66 },
|
||||
{ "aor", 0xF2 },
|
||||
{ "axor", 0xF3 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void %s(const Address& addr, const Reg32e ®) { ", p->name);
|
||||
if (p->prefix) printf("db(0x%02X); ", p->prefix);
|
||||
printf("opModM(addr, reg, 0x0F, 0x38, 0x0FC); }\n");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
const struct Tbl {
|
||||
|
@ -1666,6 +1686,25 @@ void put()
|
|||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
||||
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "vbcstnebf162ps", T_F3 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||
{ "vbcstnesh2ps", T_66 | T_0F38 | T_W0 | T_B16 | T_YMM, 0xB1 },
|
||||
{ "vcvtneebf162ps", T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||
{ "vcvtneeph2ps", T_66 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||
{ "vcvtneobf162ps", T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0 },
|
||||
{ "vcvtneoph2ps", T_0F38 | T_W0 | T_YMM, 0xB0 }
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opVex(x, 0, addr, %s, 0x%02X); }\n", p.name, type2String(p.type).c_str(), p.code);
|
||||
}
|
||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }");
|
||||
}
|
||||
// haswell gpr(reg, reg, r/m)
|
||||
{
|
||||
const struct Tbl {
|
||||
|
@ -1755,11 +1794,33 @@ void put()
|
|||
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32},
|
||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_EW1 | T_B64 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
// avx-vnni-int8
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
} tbl[] = {
|
||||
{ 0x50, "vpdpbssd", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x51, "vpdpbssds", T_F2 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x50, "vpdpbsud", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x51, "vpdpbsuds", T_F3 | T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x50, "vpdpbuud", T_0F38 | T_W0 | T_YMM },
|
||||
{ 0x51, "vpdpbuuds", T_0F38 | T_W0 | T_YMM },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1824,6 +1885,34 @@ void put64()
|
|||
|
||||
puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }");
|
||||
puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }");
|
||||
// CMPccXADD
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
} tbl[] = {
|
||||
{ "be", 0xE6 },
|
||||
{ "b", 0xE2 },
|
||||
{ "le", 0xEE },
|
||||
{ "l", 0xEC },
|
||||
{ "nbe", 0xE7 },
|
||||
{ "nb", 0xE3 },
|
||||
{ "nle", 0xEF },
|
||||
{ "nl", 0xED },
|
||||
{ "no", 0xE1 },
|
||||
{ "np", 0xEB },
|
||||
{ "ns", 0xE9 },
|
||||
{ "nz", 0xE5 },
|
||||
{ "o", 0xE0 },
|
||||
{ "p", 0xEA },
|
||||
{ "s", 0xE8 },
|
||||
{ "z", 0xE4 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void cmp%sxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0x%02X, false); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putAMX_TILE()
|
||||
|
@ -1842,6 +1931,7 @@ void putAMX_INT8()
|
|||
puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }");
|
||||
puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }");
|
||||
puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }");
|
||||
puts("void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }");
|
||||
}
|
||||
void putAMX_BF16()
|
||||
{
|
||||
|
|
2
externals/xbyak/meson.build
vendored
2
externals/xbyak/meson.build
vendored
|
@ -5,7 +5,7 @@
|
|||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '6.61',
|
||||
version: '6.68',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
|
3
externals/xbyak/readme.md
vendored
3
externals/xbyak/readme.md
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Xbyak 6.61 [![Badge Build]][Build Status]
|
||||
# Xbyak 6.68 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
|
@ -28,6 +28,7 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
|||
|
||||
### News
|
||||
|
||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||
- add movdiri, movdir64b, clwb, cldemote
|
||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||
|
|
15
externals/xbyak/readme.txt
vendored
15
externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.61
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.68
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -166,13 +166,15 @@ vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64],
|
|||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
|
||||
setDefaultEncoding(VexEncoding); // default encoding is VEX
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
||||
注意
|
||||
* k1, ..., k7 は新しいopmaskレジスタです。
|
||||
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
|
||||
* `k4 | k3`と`k3 | k4`は意味が異なります。
|
||||
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
|
||||
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
|
||||
* setDefaultEncoding()でencoding省略時のEVEX/VEXを設定できます。
|
||||
|
||||
・ラベル
|
||||
|
||||
|
@ -400,6 +402,15 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2022/12/07 ver 6.68 prefetchit{0,1}サポート
|
||||
2022/11/30 ver 6.67 CMPccXADDサポート
|
||||
2022/11/25 ver 6.66 RAO-INTサポート
|
||||
2022/11/22 ver 6.65 x32動作確認
|
||||
2022/11/04 ver 6.64 vmov*命令をmaskつきアドレッシング対応修正
|
||||
2022/10/06 ver 6.63 AVX-IFMA用のvpmadd52{h,l}uq対応
|
||||
2022/10/05 amx_fp16/avx_vnni_int8/avx_ne_convertt対応とsetDefaultEncoding()追加
|
||||
2022/09/15 ver 6.62 serialize追加
|
||||
2022/08/02 ver 6.61.1 noexceptはVisual Studio 2015以降対応
|
||||
2022/07/29 ver 6.61 movzx eax, ahがエラーになるのを修正
|
||||
2022/06/16 ver 6.60.2 GFNI, VAES, VPCLMULQDQの判定修正
|
||||
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
||||
|
|
5
externals/xbyak/sample/Makefile
vendored
5
externals/xbyak/sample/Makefile
vendored
|
@ -1,6 +1,7 @@
|
|||
XBYAK_INC=../xbyak/xbyak.h
|
||||
CXX?=g++
|
||||
|
||||
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
|
||||
BOOST_EXIST=$(shell echo "#include <boost/spirit/core.hpp>" | $(CXX) -x c++ -c - 2>/dev/null && echo 1)
|
||||
UNAME_M=$(shell uname -m)
|
||||
|
||||
ONLY_64BIT=0
|
||||
|
@ -104,7 +105,7 @@ profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
|||
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) *.exe profiler profiler-vtune
|
||||
rm -rf $(TARGET) profiler profiler-vtune
|
||||
|
||||
test : test0.cpp $(XBYAK_INC)
|
||||
test64: test0.cpp $(XBYAK_INC)
|
||||
|
|
2
externals/xbyak/sample/quantize.cpp
vendored
2
externals/xbyak/sample/quantize.cpp
vendored
|
@ -199,7 +199,7 @@ int main(int argc, char *argv[])
|
|||
quantize2(dest2, src, qTbl);
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (dest[i] != dest2[i]) {
|
||||
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
|
||||
printf("err[%d] %u %u\n", i, dest[i], dest2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
7
externals/xbyak/sample/test_util.cpp
vendored
7
externals/xbyak/sample/test_util.cpp
vendored
|
@ -89,6 +89,13 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tMOVDIRI, "movdiri" },
|
||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||
{ Cpu::tCLZERO, "clzero" },
|
||||
{ Cpu::tAMX_FP16, "amx_fp16" },
|
||||
{ Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" },
|
||||
{ Cpu::tAVX_NE_CONVERT, "avx_ne_convert" },
|
||||
{ Cpu::tAVX_IFMA, "avx_ifma" },
|
||||
{ Cpu::tRAO_INT, "rao-int" },
|
||||
{ Cpu::tCMPCCXADD, "cmpccxadd" },
|
||||
{ Cpu::tPREFETCHITI, "prefetchiti" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
|
|
8
externals/xbyak/sample/toyvm.cpp
vendored
8
externals/xbyak/sample/toyvm.cpp
vendored
|
@ -5,8 +5,8 @@
|
|||
|
||||
mem_ 4byte x 65536
|
||||
|
||||
すべての命令は4byte固定
|
||||
即値は全て16bit
|
||||
all instructions are fixed at 4 bytes.
|
||||
all immediate values are 16-bit.
|
||||
|
||||
R = A or B
|
||||
vldiR, imm ; R = imm
|
||||
|
@ -109,7 +109,7 @@ public:
|
|||
reg[r] -= imm;
|
||||
break;
|
||||
case PUT:
|
||||
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||
printf("%c %8u(0x%08x)\n", 'A' + r, reg[r], reg[r]);
|
||||
break;
|
||||
case JNZ:
|
||||
if (reg[r] != 0) pc += static_cast<signed short>(imm);
|
||||
|
@ -294,7 +294,7 @@ lp:
|
|||
p = t;
|
||||
n--;
|
||||
if (n != 0) goto lp;
|
||||
printf("c=%d(0x%08x)\n", c, c);
|
||||
printf("c=%u(0x%08x)\n", c, c);
|
||||
}
|
||||
|
||||
int main()
|
||||
|
|
49
externals/xbyak/test/Makefile
vendored
49
externals/xbyak/test/Makefile
vendored
|
@ -1,6 +1,9 @@
|
|||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32
|
||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
UNAME_S=$(shell uname -s)
|
||||
ifeq ($(shell ./detect_x32),x32)
|
||||
X32?=1
|
||||
endif
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
|
@ -20,9 +23,9 @@ endif
|
|||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
|
||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||
|
@ -53,12 +56,11 @@ noexception: noexception.cpp ../xbyak/xbyak.h
|
|||
test_nm: normalize_prefix $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
ifneq ($(ONLY_64BIT),1)
|
||||
./test_nm.sh
|
||||
./test_nm.sh noexcept
|
||||
./noexception
|
||||
./test_nm.sh Y
|
||||
./test_nm.sh avx512
|
||||
./test_address.sh
|
||||
CXX=$(CXX) ./test_nm.sh
|
||||
CXX=$(CXX) ./test_nm.sh noexcept
|
||||
CXX=$(CXX) ./test_nm.sh Y
|
||||
CXX=$(CXX) ./test_nm.sh avx512
|
||||
CXX=$(CXX) ./test_address.sh
|
||||
./jmp
|
||||
./cvt_test32
|
||||
endif
|
||||
|
@ -67,32 +69,38 @@ endif
|
|||
./misc32
|
||||
./cvt_test
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_nm.sh 64
|
||||
./test_nm.sh Y64
|
||||
CXX=$(CXX) ./test_address.sh 64
|
||||
ifneq ($(X32),1)
|
||||
CXX=$(CXX) ./test_nm.sh 64
|
||||
CXX=$(CXX) ./test_nm.sh Y64
|
||||
endif
|
||||
./jmp64
|
||||
endif
|
||||
|
||||
test_avx: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
./test_avx.sh
|
||||
./test_avx.sh Y
|
||||
CXX=$(CXX) ./test_avx.sh
|
||||
CXX=$(CXX) ./test_avx.sh Y
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
./test_avx.sh 64
|
||||
./test_avx.sh Y64
|
||||
CXX=$(CXX) ./test_avx.sh 64
|
||||
ifneq ($(X32),1)
|
||||
CXX=$(CXX) ./test_avx.sh Y64
|
||||
endif
|
||||
endif
|
||||
|
||||
test_avx512: normalize_prefix
|
||||
ifneq ($(ONLY_64BIT),0)
|
||||
./test_avx512.sh
|
||||
CXX=$(CXX) ./test_avx512.sh
|
||||
endif
|
||||
ifeq ($(BIT),64)
|
||||
./test_avx512.sh 64
|
||||
CXX=$(CXX) ./test_avx512.sh 64
|
||||
endif
|
||||
|
||||
test:
|
||||
detect_x32: detect_x32.c
|
||||
$(CC) $< -o $@
|
||||
|
||||
test: detect_x32
|
||||
$(MAKE) test_nm
|
||||
$(MAKE) test_avx
|
||||
$(MAKE) test_avx512
|
||||
|
@ -104,4 +112,3 @@ lib_run: lib_test.cpp lib_run.cpp lib.h
|
|||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||
|
||||
|
||||
|
|
2
externals/xbyak/test/Makefile.win
vendored
2
externals/xbyak/test/Makefile.win
vendored
|
@ -1,4 +1,4 @@
|
|||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
|
||||
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS -I ../
|
||||
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
|
||||
../gen/gen_code.exe > $@
|
||||
../gen/gen_avx512.exe >> $@
|
||||
|
|
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
8
externals/xbyak/test/detect_x32.c
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
#include <stdio.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
#if defined(__x86_64__) && defined(__ILP32__)
|
||||
puts("x32");
|
||||
#endif
|
||||
}
|
54
externals/xbyak/test/make_512.cpp
vendored
54
externals/xbyak/test/make_512.cpp
vendored
|
@ -1807,44 +1807,44 @@ public:
|
|||
put("vpmovd2m", K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovq2m", K, _XMM | _YMM | _ZMM);
|
||||
|
||||
put("vpmovqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusqb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusqb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
|
||||
put("vpmovqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusqw", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusqw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
|
||||
put("vpmovqd", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovqd", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovsqd", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovsqd", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovsqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovsqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovusqd", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovusqd", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovusqd", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovusqd", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusdb", XMM_KZ | _MEM, _XMM | _YMM | _ZMM);
|
||||
put("vpmovdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovsdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
put("vpmovusdb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM | _ZMM);
|
||||
|
||||
put("vpmovdw", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovdw", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovsdw", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovsdw", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovsdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovsdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovusdw", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovusdw", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovusdw", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovusdw", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovwb", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovwb", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovwb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovwb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovswb", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovswb", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
|
||||
put("vpmovuswb", XMM_KZ | _MEM, _XMM | _YMM);
|
||||
put("vpmovuswb", YMM_KZ | _MEM, _ZMM);
|
||||
put("vpmovuswb", XMM_KZ | _MEM | MEM_K, _XMM | _YMM);
|
||||
put("vpmovuswb", YMM_KZ | _MEM | MEM_K, _ZMM);
|
||||
}
|
||||
void putRot()
|
||||
{
|
||||
|
|
13
externals/xbyak/test/make_nm.cpp
vendored
13
externals/xbyak/test/make_nm.cpp
vendored
|
@ -533,6 +533,7 @@ class Test {
|
|||
"nop",
|
||||
|
||||
"sahf",
|
||||
"serialize",
|
||||
"stc",
|
||||
"std",
|
||||
"sti",
|
||||
|
@ -1017,9 +1018,7 @@ class Test {
|
|||
}
|
||||
void putCmov() const
|
||||
{
|
||||
const struct {
|
||||
const char *s;
|
||||
} tbl[] = {
|
||||
const char tbl[][4] = {
|
||||
"o",
|
||||
"no",
|
||||
"b",
|
||||
|
@ -1053,11 +1052,11 @@ class Test {
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i].s);
|
||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
|
||||
put(buf, REG16, REG16|MEM);
|
||||
put(buf, REG32, REG32|MEM);
|
||||
put(buf, REG64, REG64|MEM);
|
||||
snprintf(buf, sizeof(buf), "set%s", tbl[i].s);
|
||||
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
|
||||
put(buf, REG8|REG8_3|MEM);
|
||||
}
|
||||
}
|
||||
|
@ -1294,7 +1293,7 @@ class Test {
|
|||
put(p, REG64, "0x1234567890abcdefLL", "0x1234567890abcdef");
|
||||
put("movbe", REG16|REG32e, MEM);
|
||||
put("movbe", MEM, REG16|REG32e);
|
||||
#ifdef XBYAK64
|
||||
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||
put(p, RAX|EAX|AX|AL, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]");
|
||||
put(p, "ptr [0x1234567890abcdefLL]", "[qword 0x1234567890abcdef]", RAX|EAX|AX|AL);
|
||||
put(p, "qword [rax], 0");
|
||||
|
@ -2608,7 +2607,7 @@ public:
|
|||
putMPX();
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK64
|
||||
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||
|
||||
#ifdef USE_YASM
|
||||
putRip();
|
||||
|
|
183
externals/xbyak/test/misc.cpp
vendored
183
externals/xbyak/test/misc.cpp
vendored
|
@ -5,6 +5,7 @@
|
|||
#include <xbyak/xbyak_util.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
|
@ -97,13 +98,17 @@ CYBOZU_TEST_AUTO(mov_const)
|
|||
}
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x7fffffff]));
|
||||
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x17fffffff]), Xbyak::Error);
|
||||
}
|
||||
#ifdef XBYAK_OLD_DISP_CHECK
|
||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0x80000000]));
|
||||
CYBOZU_TEST_NO_EXCEPTION(mov(rax, ptr[(void*)0xffffffff]));
|
||||
#else
|
||||
if (sizeof(void*) != 4) { // sizeof(void*) == 4 on x32
|
||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0x80000000ull]), Xbyak::Error);
|
||||
CYBOZU_TEST_EXCEPTION(mov(rax, ptr[(void*)0xffffffffull]), Xbyak::Error);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -875,6 +880,10 @@ CYBOZU_TEST_AUTO(vnni)
|
|||
vpdpbusd(xm0, xm1, xm2);
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX
|
||||
setDefaultEncoding(VexEncoding);
|
||||
vpdpbusd(xm0, xm1, xm2); // VEX
|
||||
setDefaultEncoding(EvexEncoding);
|
||||
vpdpbusd(xm0, xm1, xm2); // EVEX
|
||||
}
|
||||
void badVex()
|
||||
{
|
||||
|
@ -885,6 +894,8 @@ CYBOZU_TEST_AUTO(vnni)
|
|||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||
0xC4, 0xE2, 0x71, 0x50, 0xC2,
|
||||
0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
|
@ -1975,3 +1986,175 @@ CYBOZU_TEST_AUTO(cpu)
|
|||
Cpu cpu;
|
||||
CYBOZU_TEST_EQUAL(cpu.has(Cpu::tINTEL) && cpu.has(Cpu::tAMD), cpu.has(Cpu::tINTEL | Cpu::tAMD));
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(minmax)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
CYBOZU_TEST_EQUAL((std::min)(3, 4), local::min_(3, 4));
|
||||
CYBOZU_TEST_EQUAL((std::max)(3, 4), local::max_(3, 4));
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(rao_int)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
aadd(ptr[rax], ecx);
|
||||
aadd(ptr[eax], ecx);
|
||||
aadd(ptr[rax], r10);
|
||||
aand(ptr[rax], ecx);
|
||||
aand(ptr[eax], ecx);
|
||||
aand(ptr[rax], r10);
|
||||
aor(ptr[rax], ecx);
|
||||
aor(ptr[eax], ecx);
|
||||
aor(ptr[rax], r10);
|
||||
axor(ptr[rax], ecx);
|
||||
axor(ptr[eax], ecx);
|
||||
axor(ptr[rax], r10);
|
||||
#else
|
||||
aadd(ptr[eax], ecx);
|
||||
aand(ptr[eax], ecx);
|
||||
aor(ptr[eax], ecx);
|
||||
axor(ptr[eax], ecx);
|
||||
#endif
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
#ifdef XBYAK64
|
||||
// aadd
|
||||
0x0f, 0x38, 0xfc, 0x08,
|
||||
0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||
|
||||
// aand
|
||||
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0x66, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0x66, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||
|
||||
// aor
|
||||
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0xf2, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0xf2, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||
|
||||
// axor
|
||||
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0xf3, 0x67, 0x0f, 0x38, 0xfc, 0x08,
|
||||
0xf3, 0x4c, 0x0f, 0x38, 0xfc, 0x10,
|
||||
#else
|
||||
// aadd
|
||||
0x0f, 0x38, 0xfc, 0x08,
|
||||
// aand
|
||||
0x66, 0x0f, 0x38, 0xfc, 0x08,
|
||||
// aor
|
||||
0xf2, 0x0f, 0x38, 0xfc, 0x08,
|
||||
// axor
|
||||
0xf3, 0x0f, 0x38, 0xfc, 0x08,
|
||||
#endif
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(CMPccXADD)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
// 32bit reg
|
||||
cmpbexadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpbxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmplexadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmplxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnbexadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnbxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnlexadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnlxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnoxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnpxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnsxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpnzxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpoxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmppxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpsxadd(ptr[rax+r10*4], ecx, edx);
|
||||
cmpzxadd(ptr[rax+r10*4], ecx, edx);
|
||||
// 64bit reg
|
||||
cmpbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmplxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnbxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlexadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnlxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnpxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpnzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpoxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmppxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpsxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
cmpzxadd(ptr[rax+r10*4], rcx, rdx);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
// 32bit reg
|
||||
0xc4, 0xa2, 0x69, 0xe6, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe2, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xee, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xec, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe7, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe3, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xef, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xed, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe1, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xeb, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe9, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe5, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe0, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xea, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe8, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0x69, 0xe4, 0x0c, 0x90,
|
||||
// 64bit reg
|
||||
0xc4, 0xa2, 0xe9, 0xe6, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe2, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xee, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xec, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe7, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe3, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xef, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xed, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe1, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xeb, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe9, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe5, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe0, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xea, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe8, 0x0c, 0x90,
|
||||
0xc4, 0xa2, 0xe9, 0xe4, 0x0c, 0x90,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(prefetchiti)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
prefetchit0(ptr[rax]);
|
||||
prefetchit1(ptr[rax]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x0f, 0x18, 0x38,
|
||||
0x0f, 0x18, 0x30
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
#endif
|
||||
|
|
2
externals/xbyak/test/noexception.cpp
vendored
2
externals/xbyak/test/noexception.cpp
vendored
|
@ -56,7 +56,7 @@ void test2()
|
|||
void test3()
|
||||
{
|
||||
static struct EmptyAllocator : Xbyak::Allocator {
|
||||
uint8_t *alloc() { return 0; }
|
||||
uint8_t *alloc(size_t) { return 0; }
|
||||
} emptyAllocator;
|
||||
struct Code : CodeGenerator {
|
||||
Code() : CodeGenerator(8, 0, &emptyAllocator)
|
||||
|
|
10
externals/xbyak/test/test_address.sh
vendored
10
externals/xbyak/test/test_address.sh
vendored
|
@ -1,13 +1,17 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
|
||||
sub()
|
||||
{
|
||||
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
CFLAGS="-Wall -I../ $OPT2"
|
||||
CXX=${CXX:=g++}
|
||||
|
||||
echo "compile address.cpp"
|
||||
g++ $CFLAGS address.cpp -o address
|
||||
$CXX $CFLAGS address.cpp -o address
|
||||
|
||||
./address $1 > a.asm
|
||||
echo "asm"
|
||||
|
@ -17,7 +21,7 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
|||
echo "xbyak"
|
||||
./address $1 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame > x.lst
|
||||
diff ok.lst x.lst && echo "ok"
|
||||
|
||||
|
|
9
externals/xbyak/test/test_avx.sh
vendored
9
externals/xbyak/test/test_avx.sh
vendored
|
@ -1,6 +1,9 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
CXX=${CXX:=g++}
|
||||
|
||||
case $1 in
|
||||
Y)
|
||||
|
@ -31,9 +34,9 @@ Y64)
|
|||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
|
||||
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX"
|
||||
echo "compile make_nm.cpp"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
|
@ -43,6 +46,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
|||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
|
|
9
externals/xbyak/test/test_avx512.sh
vendored
9
externals/xbyak/test/test_avx512.sh
vendored
|
@ -1,6 +1,9 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER="grep -v warning"
|
||||
CXX=${CXX:=g++}
|
||||
|
||||
case $1 in
|
||||
64)
|
||||
|
@ -18,9 +21,9 @@ case $1 in
|
|||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
|
||||
CFLAGS="-Wall -I../ $OPT2 -DUSE_AVX512"
|
||||
echo "compile make_512.cpp"
|
||||
g++ $CFLAGS make_512.cpp -o make_512
|
||||
$CXX $CFLAGS make_512.cpp -o make_512
|
||||
|
||||
./make_512 > a.asm
|
||||
echo "asm"
|
||||
|
@ -30,6 +33,6 @@ awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst
|
|||
echo "xbyak"
|
||||
./make_512 jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
|
|
9
externals/xbyak/test/test_nm.sh
vendored
9
externals/xbyak/test/test_nm.sh
vendored
|
@ -1,6 +1,9 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
FILTER=cat
|
||||
CXX=${CXX:=g++}
|
||||
|
||||
case $1 in
|
||||
Y)
|
||||
|
@ -44,9 +47,9 @@ noexcept)
|
|||
;;
|
||||
esac
|
||||
|
||||
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
|
||||
CFLAGS="-Wall -I../ $OPT2"
|
||||
echo "compile make_nm.cpp with $CFLAGS"
|
||||
g++ $CFLAGS make_nm.cpp -o make_nm
|
||||
$CXX $CFLAGS make_nm.cpp -o make_nm
|
||||
|
||||
./make_nm > a.asm
|
||||
echo "asm"
|
||||
|
@ -56,6 +59,6 @@ awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER
|
|||
echo "xbyak"
|
||||
./make_nm jit > nm.cpp
|
||||
echo "compile nm_frame.cpp"
|
||||
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
|
|
37
externals/xbyak/xbyak/xbyak.h
vendored
37
externals/xbyak/xbyak/xbyak.h
vendored
|
@ -118,7 +118,7 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800)
|
||||
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900)
|
||||
#undef XBYAK_TLS
|
||||
#define XBYAK_TLS thread_local
|
||||
#define XBYAK_VARIADIC_TEMPLATE
|
||||
|
@ -144,11 +144,18 @@
|
|||
#pragma warning(disable : 4127) /* constant expresison */
|
||||
#endif
|
||||
|
||||
// disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif
|
||||
|
||||
namespace Xbyak {
|
||||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x6610 /* 0xABCD = A.BC(.D) */
|
||||
VERSION = 0x6680 /* 0xABCD = A.BC(.D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -371,7 +378,7 @@ inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0
|
|||
|
||||
inline uint32_t VerifyInInt32(uint64_t x)
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||
if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0)
|
||||
#endif
|
||||
return static_cast<uint32_t>(x);
|
||||
|
@ -1478,7 +1485,6 @@ public:
|
|||
clabelDefList_.clear();
|
||||
clabelUndefList_.clear();
|
||||
resetLabelPtrList();
|
||||
ClearError();
|
||||
}
|
||||
void enterLocal()
|
||||
{
|
||||
|
@ -1820,7 +1826,7 @@ private:
|
|||
void setSIB(const RegExp& e, int reg, int disp8N = 0)
|
||||
{
|
||||
uint64_t disp64 = e.getDisp();
|
||||
#ifdef XBYAK64
|
||||
#if defined(XBYAK64) && !defined(__ILP32__)
|
||||
#ifdef XBYAK_OLD_DISP_CHECK
|
||||
// treat 0xffffffff as 0xffffffffffffffff
|
||||
uint64_t high = disp64 >> 32;
|
||||
|
@ -2412,18 +2418,21 @@ private:
|
|||
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
||||
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding)
|
||||
{
|
||||
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code0);
|
||||
}
|
||||
int orEvexIf(PreferredEncoding encoding) {
|
||||
if (encoding == DefaultEncoding) {
|
||||
encoding = EvexEncoding;
|
||||
encoding = defaultEncoding_;
|
||||
}
|
||||
if (encoding == EvexEncoding) {
|
||||
#ifdef XBYAK_DISABLE_AVX512
|
||||
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
||||
#endif
|
||||
type |= T_MUST_EVEX;
|
||||
return T_MUST_EVEX;
|
||||
}
|
||||
opAVX_X_X_XM(x1, x2, op, type, code0);
|
||||
return 0;
|
||||
}
|
||||
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
||||
{
|
||||
|
@ -2508,6 +2517,7 @@ public:
|
|||
#endif
|
||||
private:
|
||||
bool isDefaultJmpNEAR_;
|
||||
PreferredEncoding defaultEncoding_;
|
||||
public:
|
||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||
|
@ -2787,11 +2797,13 @@ public:
|
|||
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
||||
#endif
|
||||
, isDefaultJmpNEAR_(false)
|
||||
, defaultEncoding_(EvexEncoding)
|
||||
{
|
||||
labelMgr_.set(this);
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
ClearError();
|
||||
resetSize();
|
||||
labelMgr_.reset();
|
||||
labelMgr_.set(this);
|
||||
|
@ -2823,6 +2835,9 @@ public:
|
|||
#undef jnl
|
||||
#endif
|
||||
|
||||
// set default encoding to select Vex or Evex
|
||||
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
||||
|
||||
/*
|
||||
use single byte nop if useMultiByteNop = false
|
||||
*/
|
||||
|
@ -2927,6 +2942,10 @@ static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segmen
|
|||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
} // end of namespace
|
||||
|
||||
#endif // XBYAK_XBYAK_H_
|
||||
|
|
88
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
88
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
|
@ -1,4 +1,6 @@
|
|||
const char *getVersionString() const { return "6.61"; }
|
||||
const char *getVersionString() const { return "6.68"; }
|
||||
void aadd(const Address& addr, const Reg32e ®) { opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void aand(const Address& addr, const Reg32e ®) { db(0x66); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
@ -24,6 +26,8 @@ void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXM
|
|||
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
||||
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
||||
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
||||
void aor(const Address& addr, const Reg32e ®) { db(0xF2); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void axor(const Address& addr, const Reg32e ®) { db(0xF3); opModM(addr, reg, 0x0F, 0x38, 0x0FC); }
|
||||
void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
|
||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||
|
@ -654,6 +658,8 @@ void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
|
|||
void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
|
||||
void popf() { db(0x9D); }
|
||||
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
||||
void prefetchit0(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0x18); }
|
||||
void prefetchit1(const Address& addr) { opModM(addr, Reg32(6), 0x0F, 0x18); }
|
||||
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, 0x18); }
|
||||
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0x18); }
|
||||
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0x18); }
|
||||
|
@ -747,6 +753,7 @@ void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
|||
void scasb() { db(0xAE); }
|
||||
void scasd() { db(0xAF); }
|
||||
void scasw() { db(0x66); db(0xAF); }
|
||||
void serialize() { db(0x0F); db(0x01); db(0xE8); }
|
||||
void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
|
||||
void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
|
||||
void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
|
||||
|
@ -844,6 +851,8 @@ void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
|||
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
|
||||
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
|
||||
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
|
||||
void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||
void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM | T_B16, 0xB1); }
|
||||
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
|
||||
void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
|
||||
void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
|
||||
|
@ -988,6 +997,11 @@ void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T
|
|||
void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
|
||||
void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
|
||||
void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
||||
void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||
void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||
void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2 | T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||
void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_W0 | T_YMM, 0xB0); }
|
||||
void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32 | orEvexIf(encoding), 0x72); }
|
||||
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
|
||||
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
|
||||
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
||||
|
@ -1191,10 +1205,16 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
|
|||
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
|
||||
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
|
||||
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
|
||||
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||
void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); }
|
||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); }
|
||||
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x50); }
|
||||
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38 | T_W0 | T_YMM, 0x51); }
|
||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); }
|
||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); }
|
||||
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
|
||||
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
|
||||
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
|
||||
|
@ -1226,6 +1246,8 @@ void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if
|
|||
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
|
||||
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
|
||||
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
|
||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB5, encoding); }
|
||||
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_B64, 0xB4, encoding); }
|
||||
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
|
||||
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
|
||||
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
|
||||
|
@ -1642,6 +1664,22 @@ void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx())
|
|||
void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
|
||||
void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
|
||||
void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
|
||||
void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE6, false); }
|
||||
void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE2, false); }
|
||||
void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEE, false); }
|
||||
void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEC, false); }
|
||||
void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE7, false); }
|
||||
void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE3, false); }
|
||||
void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEF, false); }
|
||||
void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xED, false); }
|
||||
void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE1, false); }
|
||||
void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEB, false); }
|
||||
void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE9, false); }
|
||||
void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE5, false); }
|
||||
void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE0, false); }
|
||||
void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xEA, false); }
|
||||
void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE8, false); }
|
||||
void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opGpr(r1, addr, r2, T_66 | T_0F38, 0xE4, false); }
|
||||
void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }
|
||||
void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }
|
||||
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }
|
||||
|
@ -1653,6 +1691,7 @@ void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T
|
|||
void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }
|
||||
void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }
|
||||
void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }
|
||||
void tdpfp16ps(const Tmm &x1, const Tmm &x2, const Tmm &x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5c); }
|
||||
void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }
|
||||
#else
|
||||
void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
|
||||
|
@ -1907,7 +1946,6 @@ void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 |
|
|||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||
|
@ -2141,38 +2179,36 @@ void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T
|
|||
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
|
||||
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
|
||||
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
|
||||
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
|
||||
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB4); }
|
||||
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
|
||||
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
|
||||
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
|
||||
void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
|
||||
void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
|
||||
void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
|
||||
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x31, false); }
|
||||
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x33, true); }
|
||||
void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x31, false); }
|
||||
void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x33, true); }
|
||||
void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
|
||||
void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
|
||||
void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
|
||||
void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
|
||||
void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
|
||||
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x32, false); }
|
||||
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x35, true); }
|
||||
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x34, false); }
|
||||
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x21, false); }
|
||||
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x23, true); }
|
||||
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x22, false); }
|
||||
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x25, true); }
|
||||
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x24, false); }
|
||||
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x20, true); }
|
||||
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x11, false); }
|
||||
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x13, true); }
|
||||
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x12, false); }
|
||||
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x15, true); }
|
||||
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x14, false); }
|
||||
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x10, true); }
|
||||
void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x32, false); }
|
||||
void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x35, true); }
|
||||
void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x34, false); }
|
||||
void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x21, false); }
|
||||
void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x23, true); }
|
||||
void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x22, false); }
|
||||
void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x25, true); }
|
||||
void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x24, false); }
|
||||
void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x20, true); }
|
||||
void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x11, false); }
|
||||
void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x13, true); }
|
||||
void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x12, false); }
|
||||
void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x15, true); }
|
||||
void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x14, false); }
|
||||
void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x10, true); }
|
||||
void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
|
||||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
||||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0x30, true); }
|
||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
||||
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||
|
|
32
externals/xbyak/xbyak/xbyak_util.h
vendored
32
externals/xbyak/xbyak/xbyak_util.h
vendored
|
@ -4,7 +4,6 @@
|
|||
#ifdef XBYAK_ONLY_CLASS_CPU
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#ifndef XBYAK_THROW
|
||||
#define XBYAK_THROW(x) ;
|
||||
|
@ -96,6 +95,11 @@ struct TypeT {
|
|||
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||
|
||||
template<typename T>
|
||||
inline T max_(T x, T y) { return x >= y ? x : y; }
|
||||
template<typename T>
|
||||
inline T min_(T x, T y) { return x < y ? x : y; }
|
||||
|
||||
} // local
|
||||
|
||||
/**
|
||||
|
@ -193,8 +197,8 @@ private:
|
|||
/*
|
||||
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
|
||||
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||
numCores_[SmtLevel - 1] = local::max_(1u, numCores_[SmtLevel - 1]);
|
||||
numCores_[CoreLevel - 1] = local::max_(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
|
@ -237,7 +241,7 @@ private:
|
|||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
actual_logical_cores = local::min_(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(actual_logical_cores != 0);
|
||||
dataCacheSize_[dataCacheLevels_] =
|
||||
|
@ -247,7 +251,7 @@ private:
|
|||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
coresSharignDataCache_[dataCacheLevels_] = local::max_(actual_logical_cores / smt_width, 1u);
|
||||
dataCacheLevels_++;
|
||||
}
|
||||
}
|
||||
|
@ -302,7 +306,7 @@ public:
|
|||
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||
#else
|
||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||
|
@ -406,6 +410,13 @@ public:
|
|||
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
||||
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
||||
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
||||
XBYAK_DEFINE_TYPE(68, tAMX_FP16);
|
||||
XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8);
|
||||
XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
|
||||
XBYAK_DEFINE_TYPE(71, tAVX_IFMA);
|
||||
XBYAK_DEFINE_TYPE(72, tRAO_INT);
|
||||
XBYAK_DEFINE_TYPE(73, tCMPCCXADD);
|
||||
XBYAK_DEFINE_TYPE(74, tPREFETCHITI);
|
||||
|
||||
#undef XBYAK_SPLIT_ID
|
||||
#undef XBYAK_DEFINE_TYPE
|
||||
|
@ -545,10 +556,17 @@ public:
|
|||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
if (maxNumSubLeaves >= 1) {
|
||||
getCpuidEx(7, 1, data);
|
||||
if (EAX & (1U << 3)) type_ |= tRAO_INT;
|
||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||
if (type_ & tAVX512F) {
|
||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||
}
|
||||
if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
|
||||
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
|
||||
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
|
||||
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
|
||||
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
|
||||
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
|
||||
}
|
||||
}
|
||||
setFamily();
|
||||
|
@ -771,7 +789,7 @@ public:
|
|||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
||||
const Reg64& _rsp = code->rsp;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
saveNum_ = local::max_(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
|
|
Loading…
Reference in a new issue