externals: Update xbyak to v7.05
Merge commit 'fdf626b74f35deedce0e6196c36b8c9f846c038a'
This commit is contained in:
commit
213fe7a452
62 changed files with 5179 additions and 2059 deletions
|
@ -149,7 +149,7 @@ if ("arm64" IN_LIST ARCHITECTURE OR DYNARMIC_TESTS)
|
|||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
find_package(xbyak 6 CONFIG)
|
||||
find_package(xbyak 7 CONFIG)
|
||||
find_package(Zydis 4 CONFIG)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ if (NOT @BUILD_SHARED_LIBS@)
|
|||
endif()
|
||||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
find_dependency(xbyak 6)
|
||||
find_dependency(xbyak 7)
|
||||
find_dependency(Zydis 4)
|
||||
endif()
|
||||
|
||||
|
|
78
externals/xbyak/CMakeLists.txt
vendored
78
externals/xbyak/CMakeLists.txt
vendored
|
@ -1,53 +1,49 @@
|
|||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
project(xbyak LANGUAGES CXX VERSION 6.68)
|
||||
project(xbyak LANGUAGES CXX VERSION 7.05)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
|
||||
include(GNUInstallDirs)
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME} INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
include(GNUInstallDirs)
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
|
||||
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
)
|
||||
target_include_directories(
|
||||
${PROJECT_NAME} INTERFACE
|
||||
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
cmake/config.cmake.in
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
cmake/config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
DESTINATION
|
||||
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
DESTINATION
|
||||
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
NAMESPACE ${PROJECT_NAME}::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR)
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "include")
|
||||
endif()
|
||||
install(
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
NAMESPACE ${PROJECT_NAME}::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${headers}
|
||||
|
|
13
externals/xbyak/doc/changelog.md
vendored
13
externals/xbyak/doc/changelog.md
vendored
|
@ -1,5 +1,18 @@
|
|||
# History
|
||||
|
||||
* 2024/Jan/03 ver 7.05 support RAO-INT for APX
|
||||
* 2023/Dec/28 ver 7.04 rex2 supports two-byte opecode
|
||||
* 2023/Dec/26 ver 7.03 set the default value of dfv to 0
|
||||
* 2023/Dec/20 ver 7.02 SHA* support APX
|
||||
* 2023/Dec/19 ver 7.01 support AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE, detection of APX10/APX
|
||||
* 2023/Dec/01 ver 7.00 support APX
|
||||
* 2023/Aug/07 ver 6.73 add sha512/sm3/sm4/avx-vnni-int16
|
||||
* 2023/Aug/02 ver 6.72 add xbegin/xabort/xend
|
||||
* 2023/Jul/27 ver 6.71 Allocator supports huge page
|
||||
* 2023/Jul/05 ver 6.70 add alias of vclmulqdq, correct alias of pclmulqdq
|
||||
* 2023/Jun/27 ver 6.69.2 add constexpr to `TypeT operator|` (thanks to Wunkolo)
|
||||
* 2023/Mar/23 ver 6.69.1 add detection of xsave (thanks to Wunkolo)
|
||||
* 2023/Feb/20 ver 6.69 util::Cpu supports AMD CPUs. support UINTR
|
||||
* 2022/Dec/07 ver 6.68 support prefetchit{0,1}
|
||||
* 2022/Nov/30 ver 6.67 support CMPccXADD
|
||||
* 2022/Nov/25 ver 6.66 support RAO-INT
|
||||
|
|
28
externals/xbyak/doc/usage.md
vendored
28
externals/xbyak/doc/usage.md
vendored
|
@ -128,6 +128,34 @@ vpdpbusd(xm0, xm1, xm2); // VEX encoding
|
|||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
## APX
|
||||
[Advanced Performance Extensions (APX) Architecture Specification](https://www.intel.com/content/www/us/en/content-details/786223/intel-advanced-performance-extensions-intel-apx-architecture-specification.html)
|
||||
- Support 64-bit 16 additional GPRs (general-purpose registers) r16, ..., r31
|
||||
- 32-bit regs are r16d, ..., r31d
|
||||
- 16-bit regs are r16w, ..., r31w
|
||||
- 8-bit regs are r16b, ..., r31b
|
||||
- `add(r20, r21);`
|
||||
- `lea(r30, ptr[r29+r31]);`
|
||||
- Support three-operand instruction
|
||||
- `add(r20, r21, r23);`
|
||||
- `add(r20, ptr[rax + rcx * 8 + 0x1234], r23);`
|
||||
- Support T_nf for NF=1 (status flags update suppression)
|
||||
- `add(r20|T_nf, r21, r23);` // Set EVEX.NF=1
|
||||
- Support T_zu for NF=ZU (zero upper) for imul and setcc
|
||||
- `imul(ax|T_zu, cx, 0x1234);` // Set ND=ZU
|
||||
- `imul(ax|T_zu|T_nf, cx, 0x1234);` // Set ND=ZU and EVEX.NF=1
|
||||
- `setb(r31b|T_zu);` // same as set(r31b); movzx(r31, r31b);
|
||||
- See [sample/zero_upper.cpp](../sample/zero_upper.cpp)
|
||||
|
||||
### ccmpSCC and ctestSCC
|
||||
|
||||
- ccmpSCC(op1, op2, dfv = 0); // eflags = eflags == SCC ? cmp(op1, op2) : dfv
|
||||
- ctestSCC(op1, op2, dfv = 0); // eflags = eflags == SCC ? test(op1, op2) : dfv
|
||||
- SCC means source condition code such as z, a, gt.
|
||||
- See [sample/ccmp.cpp](../sample/ccmp.cpp)
|
||||
- Specify the union of T_of(=8), T_sf(=4), T_zf(=2), or T_cf(=1) for dfv.
|
||||
|
||||
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
|
|
13
externals/xbyak/gen/Makefile
vendored
13
externals/xbyak/gen/Makefile
vendored
|
@ -1,12 +1,17 @@
|
|||
TARGET=../xbyak/xbyak_mnemonic.h
|
||||
BIN=sortline gen_code gen_avx512
|
||||
CFLAGS=-I../ -O2 -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
CFLAGS=-I../ -I ./ -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||
|
||||
avx_type_def.h: ../xbyak/xbyak.h
|
||||
sed -n '/@@@begin of avx_type_def.h/,/@@@end of avx_type_def.h/p' $< > $@
|
||||
|
||||
avx_type.hpp: avx_type_def.h
|
||||
sortline: sortline.cpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||
gen_code: gen_code.cpp avx_type.hpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||
gen_avx512: gen_avx512.cpp avx_type.hpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
|
||||
$(TARGET): $(BIN)
|
||||
|
@ -36,4 +41,4 @@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
|
|||
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
|
||||
|
||||
clean:
|
||||
$(RM) $(BIN) $(TARGET)
|
||||
$(RM) $(BIN) $(TARGET) avx_type_def.h
|
||||
|
|
210
externals/xbyak/gen/avx_type.hpp
vendored
210
externals/xbyak/gen/avx_type.hpp
vendored
|
@ -1,190 +1,72 @@
|
|||
#include <assert.h>
|
||||
// copy CodeGenerator::AVXtype
|
||||
enum AVXtype {
|
||||
// low 3 bit
|
||||
T_N1 = 1,
|
||||
T_N2 = 2,
|
||||
T_N4 = 3,
|
||||
T_N8 = 4,
|
||||
T_N16 = 5,
|
||||
T_N32 = 6,
|
||||
T_NX_MASK = 7,
|
||||
//
|
||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||
T_66 = 1 << 5, // pp = 1
|
||||
T_F3 = 1 << 6, // pp = 2
|
||||
T_F2 = T_66 | T_F3, // pp = 3
|
||||
T_ER_R = 1 << 7, // reg{er}
|
||||
T_0F = 1 << 8,
|
||||
T_0F38 = 1 << 9,
|
||||
T_0F3A = 1 << 10,
|
||||
T_L0 = 1 << 11,
|
||||
T_L1 = 1 << 12,
|
||||
T_W0 = 1 << 13,
|
||||
T_W1 = 1 << 14,
|
||||
T_EW0 = 1 << 15,
|
||||
T_EW1 = 1 << 16,
|
||||
T_YMM = 1 << 17, // support YMM, ZMM
|
||||
T_EVEX = 1 << 18,
|
||||
T_ER_X = 1 << 19, // xmm{er}
|
||||
T_ER_Y = 1 << 20, // ymm{er}
|
||||
T_ER_Z = 1 << 21, // zmm{er}
|
||||
T_SAE_X = 1 << 22, // xmm{sae}
|
||||
T_SAE_Y = 1 << 23, // ymm{sae}
|
||||
T_SAE_Z = 1 << 24, // zmm{sae}
|
||||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_FP16 = 1 << 31,
|
||||
T_MAP5 = T_FP16 | T_0F,
|
||||
T_MAP6 = T_FP16 | T_0F38,
|
||||
T_XXX
|
||||
};
|
||||
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||
uint32_t getPP(int type) { return (type >> 5) & 3; }
|
||||
|
||||
#include "avx_type_def.h"
|
||||
|
||||
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
||||
|
||||
std::string type2String(int type)
|
||||
std::string type2String(uint64_t type)
|
||||
{
|
||||
if (type == 0) return "T_NONE";
|
||||
std::string str;
|
||||
int low = type & T_NX_MASK;
|
||||
if (0 < low) {
|
||||
if (0 < low && low < 7) {
|
||||
const char *tbl[8] = {
|
||||
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
|
||||
};
|
||||
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
|
||||
str = tbl[low - 1];
|
||||
}
|
||||
if (type & T_N_VL) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_N_VL";
|
||||
}
|
||||
if (type & T_DUP) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_DUP";
|
||||
}
|
||||
if (type & T_F2) {
|
||||
if (!str.empty()) str += " | ";
|
||||
switch (type & T_F2) {
|
||||
case T_66: str += "T_66"; break;
|
||||
case T_F3: str += "T_F3"; break;
|
||||
case T_F2: str += "T_F2"; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if (type & T_N_VL) str += "|T_N_VL";
|
||||
if (type & T_APX) str += "|T_APX";
|
||||
if ((type & T_NX_MASK) == T_DUP) str += "|T_DUP";
|
||||
if (type & T_66) str += "|T_66";
|
||||
if (type & T_F3) str += "|T_F3";
|
||||
if (type & T_F2) str += "|T_F2";
|
||||
if (type & T_0F) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP5";
|
||||
str += "|T_MAP5";
|
||||
} else {
|
||||
str += "T_0F";
|
||||
str += "|T_0F";
|
||||
}
|
||||
}
|
||||
if (type & T_0F38) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP6";
|
||||
str += "|T_MAP6";
|
||||
} else {
|
||||
str += "T_0F38";
|
||||
str += "|T_0F38";
|
||||
}
|
||||
}
|
||||
if (type & T_0F3A) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F3A";
|
||||
}
|
||||
if (type & T_L0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L0";
|
||||
}
|
||||
if (type & T_L1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "VEZ_L1";
|
||||
}
|
||||
if (type & T_W0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W0";
|
||||
}
|
||||
if (type & T_W1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_W1";
|
||||
}
|
||||
if (type & T_EW0) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW0";
|
||||
}
|
||||
if (type & T_EW1) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EW1";
|
||||
}
|
||||
if (type & T_YMM) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_YMM";
|
||||
}
|
||||
if (type & T_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_EVEX";
|
||||
}
|
||||
if (type & T_ER_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_X";
|
||||
}
|
||||
if (type & T_ER_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Y";
|
||||
}
|
||||
if (type & T_ER_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Z";
|
||||
}
|
||||
if (type & T_ER_R) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_R";
|
||||
}
|
||||
if (type & T_SAE_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_X";
|
||||
}
|
||||
if (type & T_SAE_Y) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Y";
|
||||
}
|
||||
if (type & T_SAE_Z) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_Z";
|
||||
}
|
||||
if (type & T_MUST_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MUST_EVEX";
|
||||
}
|
||||
if (type & T_B32) {
|
||||
if (!str.empty()) str += " | ";
|
||||
if (type & T_B64) {
|
||||
str += "T_B16"; // T_B16 = T_B32 | T_B64
|
||||
} else {
|
||||
str += "T_B32";
|
||||
}
|
||||
} else if (type & T_B64) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B64";
|
||||
}
|
||||
if (type & T_M_K) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_M_K";
|
||||
}
|
||||
if (type & T_VSIB) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_VSIB";
|
||||
}
|
||||
if (type & T_MEM_EVEX) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_MEM_EVEX";
|
||||
if (type & T_0F3A) str += "|T_0F3A";
|
||||
if (type & T_L0) str += "|T_L0";
|
||||
if (type & T_L1) str += "|T_L1";
|
||||
if (type & T_W0) str += "|T_W0";
|
||||
if (type & T_W1) str += "|T_W1";
|
||||
if (type & T_EW0) str += "|T_EW0";
|
||||
if (type & T_EW1) str += "|T_EW1";
|
||||
if (type & T_YMM) str += "|T_YMM";
|
||||
if (type & T_EVEX) str += "|T_EVEX";
|
||||
if (type & T_ER_X) str += "|T_ER_X";
|
||||
if (type & T_ER_Y) str += "|T_ER_Y";
|
||||
if (type & T_ER_Z) str += "|T_ER_Z";
|
||||
if (type & T_ER_R) str += "|T_ER_R";
|
||||
if (type & T_SAE_X) str += "|T_SAE_X";
|
||||
if (type & T_SAE_Y) str += "|T_SAE_Y";
|
||||
if (type & T_SAE_Z) str += "|T_SAE_Z";
|
||||
if (type & T_MUST_EVEX) str += "|T_MUST_EVEX";
|
||||
|
||||
switch (type & T_B16) { // T_B16 = T_B32 | T_B64
|
||||
case T_B16: str += "|T_B16"; break;
|
||||
case T_B32: str += "|T_B32"; break;
|
||||
case T_B64: str += "|T_B64"; break;
|
||||
default: break;
|
||||
}
|
||||
if (type & T_M_K) str += "|T_M_K";
|
||||
if (type & T_VSIB) str += "|T_VSIB";
|
||||
if (type & T_MEM_EVEX) str += "|T_MEM_EVEX";
|
||||
if (type & T_NF) str += "|T_NF";
|
||||
if (type & T_CODE1_IF1) str += "|T_CODE1_IF1";
|
||||
if (type & T_ND1) str += "|T_ND1";
|
||||
if (type & T_ZU) str += "|T_ZU";
|
||||
|
||||
if (str[0] == '|') str = str.substr(1);
|
||||
return str;
|
||||
}
|
||||
|
|
52
externals/xbyak/gen/avx_type_def.h
vendored
Normal file
52
externals/xbyak/gen/avx_type_def.h
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
// @@@begin of avx_type_def.h
|
||||
static const uint64_t T_NONE = 0ull;
|
||||
// low 3 bit
|
||||
static const uint64_t T_N1 = 1ull;
|
||||
static const uint64_t T_N2 = 2ull;
|
||||
static const uint64_t T_N4 = 3ull;
|
||||
static const uint64_t T_N8 = 4ull;
|
||||
static const uint64_t T_N16 = 5ull;
|
||||
static const uint64_t T_N32 = 6ull;
|
||||
static const uint64_t T_NX_MASK = 7ull;
|
||||
static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64)
|
||||
static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL
|
||||
static const uint64_t T_APX = 1ull << 4;
|
||||
static const uint64_t T_66 = 1ull << 5; // pp = 1
|
||||
static const uint64_t T_F3 = 1ull << 6; // pp = 2
|
||||
static const uint64_t T_ER_R = 1ull << 7; // reg{er}
|
||||
static const uint64_t T_0F = 1ull << 8;
|
||||
static const uint64_t T_0F38 = 1ull << 9;
|
||||
static const uint64_t T_0F3A = 1ull << 10;
|
||||
static const uint64_t T_L0 = 1ull << 11;
|
||||
static const uint64_t T_L1 = 1ull << 12;
|
||||
static const uint64_t T_W0 = 1ull << 13;
|
||||
static const uint64_t T_W1 = 1ull << 14;
|
||||
static const uint64_t T_EW0 = 1ull << 15;
|
||||
static const uint64_t T_EW1 = 1ull << 16;
|
||||
static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
|
||||
static const uint64_t T_EVEX = 1ull << 18;
|
||||
static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
|
||||
static const uint64_t T_ER_Y = 1ull << 20; // ymm{er}
|
||||
static const uint64_t T_ER_Z = 1ull << 21; // zmm{er}
|
||||
static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae}
|
||||
static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae}
|
||||
static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae}
|
||||
static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX
|
||||
static const uint64_t T_B32 = 1ull << 26; // m32bcst
|
||||
static const uint64_t T_B64 = 1ull << 27; // m64bcst
|
||||
static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful)
|
||||
static const uint64_t T_M_K = 1ull << 28; // mem{k}
|
||||
static const uint64_t T_VSIB = 1ull << 29;
|
||||
static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
|
||||
static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
|
||||
static const uint64_t T_MAP5 = T_FP16 | T_0F;
|
||||
static const uint64_t T_MAP6 = T_FP16 | T_0F38;
|
||||
static const uint64_t T_NF = 1ull << 32; // T_nf
|
||||
static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
|
||||
|
||||
static const uint64_t T_ND1 = 1ull << 35; // ND=1
|
||||
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
|
||||
static const uint64_t T_F2 = 1ull << 37; // pp = 3
|
||||
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
|
||||
// @@@end of avx_type_def.h
|
169
externals/xbyak/gen/gen_avx512.cpp
vendored
169
externals/xbyak/gen/gen_avx512.cpp
vendored
|
@ -15,8 +15,7 @@ using namespace Xbyak;
|
|||
void putOpmask(bool only64bit)
|
||||
{
|
||||
if (only64bit) {
|
||||
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
|
||||
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
|
||||
puts("void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -76,22 +75,14 @@ void putOpmask(bool only64bit)
|
|||
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
|
||||
}
|
||||
}
|
||||
puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
|
||||
puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
|
||||
puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
|
||||
puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
|
||||
|
||||
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
|
||||
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
|
||||
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
|
||||
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
|
||||
|
||||
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
|
||||
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
|
||||
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
|
||||
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
|
||||
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
|
||||
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const char tbl[] = "bwdq";
|
||||
const int bitTbl[] = { 8, 16, 32, 64 };
|
||||
int bit = bitTbl[i];
|
||||
printf("void kmov%c(const Opmask& k, const Operand& op) { opKmov(k, op, false, %d); }\n", tbl[i], bit);
|
||||
printf("void kmov%c(const Address& addr, const Opmask& k) { opKmov(k, addr, true, %d); }\n", tbl[i], bit);
|
||||
if (i < 3) printf("void kmov%c(const Reg32& r, const Opmask& k) { opKmov(k, r, true, %d); }\n", tbl[i], bit);
|
||||
}
|
||||
}
|
||||
|
||||
// vcmppd(k, x, op)
|
||||
|
@ -100,7 +91,7 @@ void putVcmp()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true },
|
||||
|
@ -142,9 +133,9 @@ void putVcmp()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
|
||||
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
|
||||
|
@ -173,7 +164,7 @@ void putX_XM()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
} tbl[] = {
|
||||
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
|
||||
|
@ -210,8 +201,8 @@ void putX_XM()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
||||
}
|
||||
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
|
||||
|
||||
|
@ -229,7 +220,7 @@ void putM_X()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
} tbl[] = {
|
||||
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
|
@ -242,8 +233,8 @@ void putM_X()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,7 +243,7 @@ void putXM_X()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
} tbl[] = {
|
||||
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
|
@ -265,8 +256,8 @@ void putXM_X()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, s.c_str(), p->code);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -275,7 +266,7 @@ void putX_X_XM_IMM()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
|
||||
|
@ -413,9 +404,9 @@ void putX_X_XM_IMM()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -425,7 +416,7 @@ void putShift()
|
|||
const char *name;
|
||||
uint8_t code;
|
||||
int idx;
|
||||
int type;
|
||||
uint64_t type;
|
||||
} tbl[] = {
|
||||
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
|
||||
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
|
||||
|
@ -435,8 +426,8 @@ void putShift()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
|
||||
std::string s = type2String(p.type);
|
||||
printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, s.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -446,7 +437,7 @@ void putExtractInsert()
|
|||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
uint64_t type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
|
@ -461,16 +452,16 @@ void putExtractInsert()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
std::string s = type2String(p.type);
|
||||
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
|
||||
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
|
||||
printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, s.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
uint8_t code;
|
||||
int type;
|
||||
uint64_t type;
|
||||
bool isZMM;
|
||||
} tbl[] = {
|
||||
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
|
||||
|
@ -485,12 +476,12 @@ void putExtractInsert()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
std::string s = type2String(p.type);
|
||||
const char *x = p.isZMM ? "Zmm" : "Ymm";
|
||||
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
|
||||
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {"
|
||||
"if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) "
|
||||
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
|
||||
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, s.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -501,7 +492,7 @@ void putBroadcast(bool only64bit)
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
int reg;
|
||||
} tbl[] = {
|
||||
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
|
||||
|
@ -511,9 +502,9 @@ void putBroadcast(bool only64bit)
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
std::string s = type2String(p.type);
|
||||
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
|
||||
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, s.c_str(), p.code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -536,7 +527,7 @@ void putCvt()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
int ptn;
|
||||
} tbl[] = {
|
||||
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
|
||||
|
@ -583,28 +574,28 @@ void putCvt()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
std::string s = type2String(p.type);
|
||||
switch (p.ptn) {
|
||||
case 0:
|
||||
printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Reg32e& r, const Operand& op) { uint64_t type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 1:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 2:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 3:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 4:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 5:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
case 6:
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, s.c_str(), p.code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -621,7 +612,7 @@ void putGather()
|
|||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
uint8_t code;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
|
@ -636,15 +627,15 @@ void putGather()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
std::string s = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
void putScatter()
|
||||
{
|
||||
const struct Tbl {
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
uint8_t code;
|
||||
int mode; // reverse of gather
|
||||
} tbl[] = {
|
||||
|
@ -660,8 +651,8 @@ void putScatter()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
|
||||
std::string s = type2String(p.type | T_VSIB);
|
||||
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, s.c_str(), p.code, p.mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -689,7 +680,7 @@ void putMov()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
int mode;
|
||||
} tbl[] = {
|
||||
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL | T_M_K, false },
|
||||
|
@ -718,8 +709,8 @@ void putMov()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
|
||||
std::string s = type2String(p.type);
|
||||
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, s.c_str(), p.code, p.mode ? "true" : "false");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -729,7 +720,7 @@ void putX_XM_IMM()
|
|||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
uint64_t type;
|
||||
bool hasIMM;
|
||||
} tbl[] = {
|
||||
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
|
@ -770,9 +761,9 @@ void putX_XM_IMM()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
std::string s = type2String(p->type);
|
||||
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", s.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -784,7 +775,7 @@ void putMisc()
|
|||
const struct Tbl {
|
||||
const char *name;
|
||||
int zm;
|
||||
int type;
|
||||
uint64_t type;
|
||||
uint8_t code;
|
||||
bool isZmm;
|
||||
} tbl[] = {
|
||||
|
@ -810,9 +801,9 @@ void putMisc()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
|
||||
std::string s = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB);
|
||||
printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n"
|
||||
, p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
|
||||
, p.name, p.zm, s.c_str(), p.code, p.isZmm ? "ZMM" : "YMM");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -887,18 +878,18 @@ void putFP16_FMA()
|
|||
{ "213", 0xA0 },
|
||||
{ "231", 0xB0 },
|
||||
};
|
||||
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
uint64_t type = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B16;
|
||||
type |= T_ER_Z | T_YMM | T_B16;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
type |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
std::string s = type2String(type);
|
||||
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
|
||||
, tbl[i].name, ord[k].str, suf, s.c_str(), tbl[i].code | ord[k].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -914,23 +905,23 @@ void putFP16_FMA2()
|
|||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
uint64_t type = T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
if (j == 0) {
|
||||
t |= T_F2;
|
||||
type |= T_F2;
|
||||
} else {
|
||||
t |= T_F3;
|
||||
type |= T_F3;
|
||||
}
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B32;
|
||||
type |= T_ER_Z | T_YMM | T_B32;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
type |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
std::string s = type2String(type);
|
||||
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
|
||||
, j == 0 ? "c" : "", tbl[i].name, suf, s.c_str(), tbl[i].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -938,16 +929,16 @@ void putFP16_FMA2()
|
|||
void putFP16_2()
|
||||
{
|
||||
{
|
||||
int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
|
||||
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
|
||||
uint64_t type = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
|
||||
std::string s = type2String(type);
|
||||
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", s.c_str());
|
||||
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", s.c_str());
|
||||
}
|
||||
{
|
||||
int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
|
||||
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
|
||||
uint64_t type = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||
std::string s = type2String(type);
|
||||
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", s.c_str());
|
||||
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", s.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
762
externals/xbyak/gen/gen_code.cpp
vendored
762
externals/xbyak/gen/gen_code.cpp
vendored
File diff suppressed because it is too large
Load diff
2
externals/xbyak/meson.build
vendored
2
externals/xbyak/meson.build
vendored
|
@ -5,7 +5,7 @@
|
|||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '6.68',
|
||||
version: '7.05',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
|
10
externals/xbyak/readme.md
vendored
10
externals/xbyak/readme.md
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
# Xbyak 6.68 [![Badge Build]][Build Status]
|
||||
# Xbyak 7.05 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
|
@ -21,13 +21,21 @@ It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl
|
|||
- header file only
|
||||
- Intel/MASM like syntax
|
||||
- fully support AVX-512
|
||||
- support APX/AVX10
|
||||
|
||||
**Note**:
|
||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### Derived Projects
|
||||
- [Xbyak_aarch64](https://github.com/fujitsu/xbyak_aarch64/) : for AArch64
|
||||
- [Xbyak_riscv](https://github.com/herumi/xbyak_riscv) : for RISC-V
|
||||
|
||||
### News
|
||||
|
||||
- support RAO-INT for APX
|
||||
- support AVX10 detection, AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE
|
||||
- support APX except for a few instructions
|
||||
- add amx_fp16/avx_vnni_int8/avx_ne_convert/avx-ifma
|
||||
- add movdiri, movdir64b, clwb, cldemote
|
||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||
|
|
17
externals/xbyak/readme.txt
vendored
17
externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.68
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 7.05
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -46,6 +46,8 @@ Linuxではmake installで/usr/local/include/xbyakにコピーされます。
|
|||
-----------------------------------------------------------------------------
|
||||
◎新機能
|
||||
|
||||
APX/AVX10対応
|
||||
|
||||
例外なしモード追加
|
||||
XBYAK_NO_EXCEPTIONを定義してコンパイルするとgcc/clangで-fno-exceptionsオプションでコンパイルできます。
|
||||
エラーは例外の代わりに`Xbyak::GetError()`で通達されます。
|
||||
|
@ -402,6 +404,19 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2024/01/03 ver 7.05 APX対応RAO-INT
|
||||
2023/12/28 ver 7.04 2バイトオペコードのrex2対応
|
||||
2023/12/26 ver 7.03 dfvのデフォルト値を0に設定
|
||||
2023/12/20 ver 7.02 SHA*のAPX対応
|
||||
2023/12/19 ver 7.01 AESKLE, WIDE_KL, KEYLOCKER, KEYLOCKER_WIDE対応 APX10/APX判定対応
|
||||
2023/12/01 ver 7.00 APX対応
|
||||
2023/08/07 ver 6.73 sha512/sm3/sm4/avx-vnni-int16追加
|
||||
2023/08/02 ver 6.72 xabort, xbegin, xend追加
|
||||
2023/07/27 ver 6.71 Allocatorでhuge pageを考慮する。
|
||||
2023/07/05 ver 6.70 vpclmulqdqのailas追加
|
||||
2023/06/27 ver 6.69.2 `TypeT operator|`にconstexpr追加(thanks to Wunkolo)
|
||||
2023/03/23 ver 6.69.1 xsave判定追加(thanks to Wunkolo)
|
||||
2023/02/20 ver 6.69 util::CpuがAMD対応 UINTR命令対応
|
||||
2022/12/07 ver 6.68 prefetchit{0,1}サポート
|
||||
2022/11/30 ver 6.67 CMPccXADDサポート
|
||||
2022/11/25 ver 6.66 RAO-INTサポート
|
||||
|
|
16
externals/xbyak/sample/Makefile
vendored
16
externals/xbyak/sample/Makefile
vendored
|
@ -30,7 +30,7 @@ else
|
|||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += test64 bf64 memfunc64 test_util64 jmp_table64
|
||||
TARGET += test64 bf64 memfunc64 test_util64 jmp_table64 zero_upper ccmp no_flags
|
||||
ifeq ($(BOOST_EXIST),1)
|
||||
TARGET += calc64 #calc2_64
|
||||
endif
|
||||
|
@ -103,6 +103,18 @@ profiler: profiler.cpp ../xbyak/xbyak_util.h
|
|||
$(CXX) $(CFLAGS) profiler.cpp -o $@
|
||||
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||
$(CXX) $(CFLAGS) profiler.cpp -o $@ -DXBYAK_USE_VTUNE -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||
zero_upper: zero_upper.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) zero_upper.cpp -o $@
|
||||
test_zero_upper: zero_upper
|
||||
sde -future -- ./zero_upper
|
||||
ccmp: ccmp.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) ccmp.cpp -o $@
|
||||
test_ccmp: ccmp
|
||||
sde -future -- ./ccmp
|
||||
no_flags: no_flags.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) no_flags.cpp -o $@
|
||||
test_no_flags: no_flags
|
||||
sde -future -- ./no_flags
|
||||
|
||||
clean:
|
||||
rm -rf $(TARGET) profiler profiler-vtune
|
||||
|
@ -122,7 +134,7 @@ toyvm : toyvm.cpp $(XBYAK_INC)
|
|||
static_buf: static_buf.cpp $(XBYAK_INC)
|
||||
static_buf64: static_buf.cpp $(XBYAK_INC)
|
||||
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
test_util64 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
jmp_table: jmp_table.cpp $(XBYAK_INC)
|
||||
jmp_table64: jmp_table.cpp $(XBYAK_INC)
|
||||
memfd: memfd.cpp $(XBYAK_INC)
|
||||
|
|
68
externals/xbyak/sample/ccmp.cpp
vendored
Normal file
68
externals/xbyak/sample/ccmp.cpp
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
An example of ccmp
|
||||
> g++ ccmp.cpp -I ../xbyak
|
||||
> sde -future -- ./a.out
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
struct Code1 : Xbyak::CodeGenerator {
|
||||
Code1()
|
||||
{
|
||||
Xbyak::util::StackFrame sf(this, 2);
|
||||
const auto& p1 = sf.p[0];
|
||||
const auto& p2 = sf.p[1];
|
||||
int dfv = 0;
|
||||
cmp(p1, 3);
|
||||
ctesta(p2, 1, dfv); // eflags = (p1 > 3) ? ((p2 & 1) == 0) : dfv;
|
||||
setz(al|T_zu);
|
||||
}
|
||||
};
|
||||
|
||||
struct Code2 : Xbyak::CodeGenerator {
|
||||
Code2()
|
||||
{
|
||||
Xbyak::util::StackFrame sf(this, 3);
|
||||
const auto& p1 = sf.p[0];
|
||||
const auto& p2 = sf.p[1];
|
||||
const auto& p3 = sf.p[2];
|
||||
int dfv = 0;
|
||||
cmp(p1, 1);
|
||||
ccmpe(p2, 2, dfv); // eflags = p1==1 ? p2==2 : dfv;
|
||||
ccmpe(p3, 3, dfv); // eflags = (p1==1 && p2==2) ? p3==3 : dfv;
|
||||
setz(al|T_zu); // p1==1 && p2==2 && p3==3
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
{
|
||||
puts("(p1 > 3) && ((p2 & 1) == 0)");
|
||||
Code1 c;
|
||||
auto f = c.getCode<int (*)(int, int)>();
|
||||
for (int p1 = 2; p1 < 5; p1++) {
|
||||
for (int p2 = 0; p2 < 3; p2++) {
|
||||
printf("p1=%d p2=%d ret=%d (%d)\n", p1, p2, f(p1, p2), p1 > 3 && ((p2&1) == 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
puts("p1 == 1 && p2 == 2 && p3 == 3");
|
||||
Code2 c;
|
||||
auto f = c.getCode<int (*)(int, int, int)>();
|
||||
for (int p1 = 0; p1 < 3; p1++) {
|
||||
for (int p2 = 1; p2 < 4; p2++) {
|
||||
for (int p3 = 2; p3 < 5; p3++) {
|
||||
printf("p1=%d p2=%d p3=%d ret=%d (%d)\n", p1, p2, p3, f(p1, p2, p3), p1==1 && p2==2 && p3==3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
2
externals/xbyak/sample/cpuid/adl.txt
vendored
2
externals/xbyak/sample/cpuid/adl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx_vnni waitpkg clflushopt cldemote movdiri movdir64b
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
2
externals/xbyak/sample/cpuid/arl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/arl.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/bdw.txt
vendored
2
externals/xbyak/sample/cpuid/bdw.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe
|
||||
|
|
2
externals/xbyak/sample/cpuid/clx.txt
vendored
2
externals/xbyak/sample/cpuid/clx.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni clflushopt clwb
|
||||
|
|
2
externals/xbyak/sample/cpuid/cnl.txt
vendored
2
externals/xbyak/sample/cpuid/cnl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi clflushopt
|
||||
|
|
20
externals/xbyak/sample/cpuid/cpuid.sh
vendored
20
externals/xbyak/sample/cpuid/cpuid.sh
vendored
|
@ -1,11 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
UPDATE=0
|
||||
if [ $# -eq 1 ]; then
|
||||
UPDATE=1
|
||||
fi
|
||||
|
||||
if [ $UPDATE == 1 ]; then
|
||||
echo "UPDATE"
|
||||
fi
|
||||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl mtl rpl spr gnr srf grr arl lnl)
|
||||
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
|
||||
diff tmp.txt $cpu.txt
|
||||
if [ $UPDATE == 1 ]; then
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
|
||||
else
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > tmp.txt
|
||||
diff $cpu.txt tmp.txt
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
2
externals/xbyak/sample/cpuid/cpx.txt
vendored
2
externals/xbyak/sample/cpuid/cpx.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl avx512_vnni avx512_bf16 clflushopt clwb
|
||||
|
|
2
externals/xbyak/sample/cpuid/glm.txt
vendored
2
externals/xbyak/sample/cpuid/glm.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
|
|
2
externals/xbyak/sample/cpuid/glp.txt
vendored
2
externals/xbyak/sample/cpuid/glp.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
|
|
2
externals/xbyak/sample/cpuid/gnr.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/gnr.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize amx_fp16 prefetchiti avx10
|
2
externals/xbyak/sample/cpuid/grr.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/grr.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma rao-int cmpccxadd aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/hsw.txt
vendored
2
externals/xbyak/sample/cpuid/hsw.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand f16c movbe
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand f16c movbe
|
||||
|
|
2
externals/xbyak/sample/cpuid/icl.txt
vendored
2
externals/xbyak/sample/cpuid/icl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb
|
||||
|
|
2
externals/xbyak/sample/cpuid/icx.txt
vendored
2
externals/xbyak/sample/cpuid/icx.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq clflushopt clwb
|
||||
|
|
2
externals/xbyak/sample/cpuid/ivb.txt
vendored
2
externals/xbyak/sample/cpuid/ivb.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx enh_rep rdrand f16c
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx enh_rep rdrand f16c
|
||||
|
|
2
externals/xbyak/sample/cpuid/knl.txt
vendored
2
externals/xbyak/sample/cpuid/knl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd
|
||||
|
|
2
externals/xbyak/sample/cpuid/knm.txt
vendored
2
externals/xbyak/sample/cpuid/knm.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd avx512_4vnniw avx512_4fmaps avx512_vpopcntdq
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed prefetchwt1 f16c movbe avx512f avx512pf avx512er avx512cd avx512_4vnniw avx512_4fmaps avx512_vpopcntdq
|
||||
|
|
2
externals/xbyak/sample/cpuid/lnl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/lnl.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd sha512 sm3 sm4 avx_vnni_int16 aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/mtl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/mtl.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/rpl.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/rpl.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b serialize aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/skl.txt
vendored
2
externals/xbyak/sample/cpuid/skl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe clflushopt
|
||||
|
|
2
externals/xbyak/sample/cpuid/skx.txt
vendored
2
externals/xbyak/sample/cpuid/skx.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap f16c movbe avx512f avx512dq avx512cd avx512bw avx512vl clflushopt clwb
|
||||
|
|
2
externals/xbyak/sample/cpuid/snb.txt
vendored
2
externals/xbyak/sample/cpuid/snb.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx
|
||||
|
|
2
externals/xbyak/sample/cpuid/spr.txt
vendored
2
externals/xbyak/sample/cpuid/spr.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize
|
||||
|
|
2
externals/xbyak/sample/cpuid/srf.txt
vendored
Normal file
2
externals/xbyak/sample/cpuid/srf.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt enh_rep rdrand adx rdseed smap sha f16c movbe gfni vaes vpclmulqdq avx_vnni waitpkg clflushopt cldemote clwb movdiri movdir64b uintr serialize avx_vnni_int8 avx_ne_convert avx_ifma cmpccxadd aeskle wide_kl keylocker keylocker_wide
|
2
externals/xbyak/sample/cpuid/tgl.txt
vendored
2
externals/xbyak/sample/cpuid/tgl.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt movdiri movdir64b
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_vp2intersect clflushopt clwb movdiri movdir64b aeskle wide_kl keylocker keylocker_wide
|
||||
|
|
2
externals/xbyak/sample/cpuid/tmp.txt
vendored
2
externals/xbyak/sample/cpuid/tmp.txt
vendored
|
@ -1,2 +0,0 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq avx fma avx2 bmi1 bmi2 lzcnt prefetchw enh_rep rdrand adx rdseed smap sha f16c movbe avx512f avx512dq avx512_ifma avx512cd avx512bw avx512vl avx512_vbmi avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq avx512_bf16 avx512_vp2intersect amx(tile) amx(int8) amx(bf16) avx_vnni avx512_fp16 waitpkg clflushopt cldemote movdiri movdir64b
|
2
externals/xbyak/sample/cpuid/tnt.txt
vendored
2
externals/xbyak/sample/cpuid/tnt.txt
vendored
|
@ -1,2 +1,2 @@
|
|||
vendor intel
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp osxsave(xgetvb) pclmulqdq enh_rep rdrand rdseed smap sha movbe clflushopt
|
||||
mmx mmx2 cmov sse sse2 sse3 ssse3 sse41 sse42 popcnt aesni rdtscp xsave(xgetvb) osxsave pclmulqdq enh_rep rdrand rdseed smap sha movbe gfni clflushopt cldemote clwb
|
||||
|
|
10
externals/xbyak/sample/cpuid/update-txt.sh
vendored
10
externals/xbyak/sample/cpuid/update-txt.sh
vendored
|
@ -1,10 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
make -C ../ test_util64
|
||||
|
||||
cpus=(p4p mrm pnr nhm wsm snb ivb hsw bdw slt slm glm glp tnt skl cnl icl skx clx cpx icx knl knm tgl adl spr)
|
||||
for cpu in ${cpus[@]} ; do
|
||||
echo $cpu
|
||||
~/bin/sde -$cpu -- ../test_util64 -cpuid > $cpu.txt
|
||||
done
|
||||
|
25
externals/xbyak/sample/no_flags.cpp
vendored
Normal file
25
externals/xbyak/sample/no_flags.cpp
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
#include <stdio.h>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(bool nf) {
|
||||
xor_(eax, eax); // CF = 0
|
||||
mov(eax, -1);
|
||||
if (nf) {
|
||||
puts("no flags (with T_nf)");
|
||||
add(eax|T_nf, eax, 1); // does not change CF
|
||||
} else {
|
||||
puts("change flags (without T_nf)");
|
||||
add(eax, eax, 1); // CF = 1
|
||||
}
|
||||
adc(eax, 0); // eax = CF ? 1 : 0
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
Code c(i);
|
||||
printf("i=%d ret=%d\n", i, c.getCode<int(*)()>()());
|
||||
}
|
||||
}
|
21
externals/xbyak/sample/test_util.cpp
vendored
21
externals/xbyak/sample/test_util.cpp
vendored
|
@ -31,12 +31,14 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tSSSE3, "ssse3" },
|
||||
{ Cpu::tSSE41, "sse41" },
|
||||
{ Cpu::tSSE42, "sse42" },
|
||||
{ Cpu::tSSE4a, "sse4a" },
|
||||
{ Cpu::tPOPCNT, "popcnt" },
|
||||
{ Cpu::t3DN, "3dn" },
|
||||
{ Cpu::tE3DN, "e3dn" },
|
||||
{ Cpu::tAESNI, "aesni" },
|
||||
{ Cpu::tRDTSCP, "rdtscp" },
|
||||
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
||||
{ Cpu::tXSAVE, "xsave(xgetvb)" },
|
||||
{ Cpu::tOSXSAVE, "osxsave" },
|
||||
{ Cpu::tPCLMULQDQ, "pclmulqdq" },
|
||||
{ Cpu::tAVX, "avx" },
|
||||
{ Cpu::tFMA, "fma" },
|
||||
|
@ -86,8 +88,11 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tWAITPKG, "waitpkg" },
|
||||
{ Cpu::tCLFLUSHOPT, "clflushopt" },
|
||||
{ Cpu::tCLDEMOTE, "cldemote" },
|
||||
{ Cpu::tCLWB, "clwb" },
|
||||
{ Cpu::tMOVDIRI, "movdiri" },
|
||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||
{ Cpu::tUINTR, "uintr" },
|
||||
{ Cpu::tSERIALIZE, "serialize" },
|
||||
{ Cpu::tCLZERO, "clzero" },
|
||||
{ Cpu::tAMX_FP16, "amx_fp16" },
|
||||
{ Cpu::tAVX_VNNI_INT8, "avx_vnni_int8" },
|
||||
|
@ -96,12 +101,25 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
{ Cpu::tRAO_INT, "rao-int" },
|
||||
{ Cpu::tCMPCCXADD, "cmpccxadd" },
|
||||
{ Cpu::tPREFETCHITI, "prefetchiti" },
|
||||
{ Cpu::tSHA512, "sha512" },
|
||||
{ Cpu::tSM3, "sm3" },
|
||||
{ Cpu::tSM4, "sm4" },
|
||||
{ Cpu::tAVX_VNNI_INT16, "avx_vnni_int16" },
|
||||
{ Cpu::tAPX_F, "apx_f" },
|
||||
{ Cpu::tAVX10, "avx10" },
|
||||
{ Cpu::tAESKLE, "aeskle" },
|
||||
{ Cpu::tWIDE_KL, "wide_kl" },
|
||||
{ Cpu::tKEYLOCKER, "keylocker" },
|
||||
{ Cpu::tKEYLOCKER_WIDE, "keylocker_wide" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
}
|
||||
printf("\n");
|
||||
if (onlyCpuidFeature) return;
|
||||
if (cpu.has(Cpu::tAVX10)) {
|
||||
printf("AVX10 version %d\n", cpu.getAVX10version());
|
||||
}
|
||||
if (cpu.has(Cpu::tPOPCNT)) {
|
||||
const int n = 0x12345678; // bitcount = 13
|
||||
const int ok = 13;
|
||||
|
@ -127,7 +145,6 @@ void putCPUinfo(bool onlyCpuidFeature)
|
|||
Core i7-3930K 6 2D
|
||||
*/
|
||||
cpu.putFamily();
|
||||
if (!cpu.has(Cpu::tINTEL)) return;
|
||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||
}
|
||||
|
|
48
externals/xbyak/sample/zero_upper.cpp
vendored
Normal file
48
externals/xbyak/sample/zero_upper.cpp
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
An example of T_zu (zero upper) flag
|
||||
> g++ zero_upper.cpp -I ../xbyak
|
||||
> sde -future -- ./a.out
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int mode)
|
||||
{
|
||||
mov(eax, 0x12345678);
|
||||
cmp(eax, eax); // ZF=1
|
||||
switch (mode) {
|
||||
case 0: // imul
|
||||
puts("imul");
|
||||
imul(ax,ax, 0x1234);
|
||||
break;
|
||||
case 1: // imul+zu
|
||||
puts("imul+zu");
|
||||
imul(ax|T_zu, ax, 0x1234);
|
||||
break;
|
||||
case 2: // setz
|
||||
puts("setz");
|
||||
setz(al);
|
||||
break;
|
||||
case 3: // setz+zu
|
||||
puts("setz+zu");
|
||||
setz(al|T_zu);
|
||||
break;
|
||||
}
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
for (int mode = 0; mode < 4; mode++) {
|
||||
Code c(mode);
|
||||
auto f = c.getCode<int (*)()>();
|
||||
printf("ret=%08x\n", f());
|
||||
}
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR %s\n", e.what());
|
||||
}
|
37
externals/xbyak/test/Makefile
vendored
37
externals/xbyak/test/Makefile
vendored
|
@ -1,5 +1,5 @@
|
|||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32 detect_x32
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
XBYAK_INC=../xbyak/xbyak.h ../xbyak/xbyak_mnemonic.h
|
||||
UNAME_S=$(shell uname -s)
|
||||
ifeq ($(shell ./detect_x32),x32)
|
||||
X32?=1
|
||||
|
@ -13,45 +13,50 @@ ifeq ($(UNAME_S),Darwin)
|
|||
# 32-bit binary is not supported
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(findstring MINGW64,$(UNAME_S)),MINGW64)
|
||||
ONLY_64BIT=1
|
||||
endif
|
||||
ifeq ($(ONLY_64BIT),0)
|
||||
TARGET += jmp address
|
||||
endif
|
||||
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64
|
||||
TARGET += jmp64 address64 apx
|
||||
endif
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
|
||||
CFLAGS=-O2 -Wall -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
CFLAGS=-O2 -Wall -I.. -I. $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||
normalize_prefix: normalize_prefix.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
|
||||
test_mmx: test_mmx.cpp ../xbyak/xbyak.h
|
||||
test_mmx: test_mmx.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
|
||||
jmp: jmp.cpp ../xbyak/xbyak.h
|
||||
jmp: jmp.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
|
||||
jmp64: jmp.cpp ../xbyak/xbyak.h
|
||||
jmp64: jmp.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
|
||||
address: address.cpp ../xbyak/xbyak.h
|
||||
address: address.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
|
||||
address64: address.cpp ../xbyak/xbyak.h
|
||||
address64: address.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
|
||||
bad_address: bad_address.cpp ../xbyak/xbyak.h
|
||||
bad_address: bad_address.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||
misc: misc.cpp ../xbyak/xbyak.h
|
||||
misc: misc.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||
misc32: misc.cpp ../xbyak/xbyak.h
|
||||
misc32: misc.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@ -DXBYAK32
|
||||
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
||||
cvt_test: cvt_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
||||
cvt_test32: cvt_test.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -DXBYAK32
|
||||
noexception: noexception.cpp ../xbyak/xbyak.h
|
||||
noexception: noexception.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) $< -o $@ -fno-exceptions
|
||||
apx: apx.cpp $(XBYAK_INC)
|
||||
$(CXX) $(CFLAGS) apx.cpp -o $@
|
||||
|
||||
test_nm: normalize_prefix $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
|
@ -75,6 +80,7 @@ ifneq ($(X32),1)
|
|||
CXX=$(CXX) ./test_nm.sh Y64
|
||||
endif
|
||||
./jmp64
|
||||
./apx
|
||||
endif
|
||||
|
||||
test_avx: normalize_prefix
|
||||
|
@ -112,3 +118,4 @@ lib_run: lib_test.cpp lib_run.cpp lib.h
|
|||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
make_nm: make_nm.cpp $(XBYAK_INC)
|
||||
|
||||
.PHONY: test
|
||||
|
|
1964
externals/xbyak/test/apx.cpp
vendored
Normal file
1964
externals/xbyak/test/apx.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
4
externals/xbyak/test/cvt_test.cpp
vendored
4
externals/xbyak/test/cvt_test.cpp
vendored
|
@ -109,8 +109,8 @@ CYBOZU_TEST_AUTO(changeBit)
|
|||
{ &dil, &di, &edi, &rdi, &xmm7, &ymm7, &zmm7 },
|
||||
{ &r8b, &r8w, &r8d, &r8, &xmm8, &ymm8, &zmm8 },
|
||||
{ &r15b, &r15w, &r15d, &r15, &xmm15, &ymm15, &zmm15 },
|
||||
{ 0, 0, 0, 0, &xmm16, &ymm16, &zmm16 },
|
||||
{ 0, 0, 0, 0, &xmm31, &ymm31, &zmm31 },
|
||||
{ &r16b, &r16w, &r16d, &r16, &xmm16, &ymm16, &zmm16 },
|
||||
{ &r31b, &r31w, &r31d, &r31, &xmm31, &ymm31, &zmm31 },
|
||||
};
|
||||
const int bitTbl[N] = { 8, 16, 32, 64, 128, 256, 512 };
|
||||
#else
|
||||
|
|
40
externals/xbyak/test/make_nm.cpp
vendored
40
externals/xbyak/test/make_nm.cpp
vendored
|
@ -558,6 +558,7 @@ class Test {
|
|||
"wbinvd",
|
||||
"wrmsr",
|
||||
"xlatb",
|
||||
"xend",
|
||||
|
||||
"popf",
|
||||
"pushf",
|
||||
|
@ -1050,6 +1051,10 @@ class Test {
|
|||
"nle",
|
||||
"g",
|
||||
};
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wformat-truncation" // wrong detection
|
||||
#endif
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
char buf[32];
|
||||
snprintf(buf, sizeof(buf), "cmov%s", tbl[i]);
|
||||
|
@ -1059,6 +1064,9 @@ class Test {
|
|||
snprintf(buf, sizeof(buf), "set%s", tbl[i]);
|
||||
put(buf, REG8|REG8_3|MEM);
|
||||
}
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
void putReg1() const
|
||||
{
|
||||
|
@ -1326,6 +1334,7 @@ class Test {
|
|||
#ifdef XBYAK64
|
||||
put("cmpxchg16b", MEM);
|
||||
put("fxrstor64", MEM);
|
||||
put("xbegin", "0x12345678");
|
||||
#endif
|
||||
{
|
||||
const char tbl[][8] = {
|
||||
|
@ -1348,6 +1357,7 @@ class Test {
|
|||
put("xchg", EAX|REG32, EAX|REG32|MEM);
|
||||
put("xchg", MEM, EAX|REG32);
|
||||
put("xchg", REG64, REG64|MEM);
|
||||
put("xabort", IMM8);
|
||||
}
|
||||
void putShift() const
|
||||
{
|
||||
|
@ -1493,18 +1503,6 @@ class Test {
|
|||
put(p, XMM, XMM|MEM, IMM);
|
||||
}
|
||||
}
|
||||
{
|
||||
const char tbl[][16] = {
|
||||
"pclmullqlqdq",
|
||||
"pclmulhqlqdq",
|
||||
// "pclmullqhdq", // QQQ : not supported by nasm/yasm
|
||||
// "pclmulhqhdq",
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const char *p = tbl[i];
|
||||
put(p, XMM, XMM|MEM);
|
||||
}
|
||||
}
|
||||
put("extractps", REG32e|MEM, XMM, IMM);
|
||||
put("pextrw", REG32e|MEM, XMM, IMM); // pextrw for REG32 is for MMX2
|
||||
put("pextrb", REG32e|MEM, XMM, IMM);
|
||||
|
@ -1522,6 +1520,23 @@ class Test {
|
|||
#endif
|
||||
|
||||
}
|
||||
void putVpclmulqdq()
|
||||
{
|
||||
const char tbl[][16] = {
|
||||
"vpclmullqlqdq",
|
||||
"vpclmulhqlqdq",
|
||||
"vpclmullqhqdq",
|
||||
"vpclmulhqhqdq",
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const char *p = tbl[i] + 1; // remove the top 'v'
|
||||
put(p, XMM, XMM|MEM);
|
||||
p = tbl[i]; // use the top 'v'
|
||||
put(p, XMM, XMM, XMM|MEM);
|
||||
put(p, YMM, YMM, YMM|MEM);
|
||||
put(p, ZMM, ZMM, ZMM|MEM);
|
||||
}
|
||||
}
|
||||
void putSHA() const
|
||||
{
|
||||
put("sha1rnds4", XMM, XMM|MEM, IMM);
|
||||
|
@ -2569,6 +2584,7 @@ public:
|
|||
putPushPop8_16();
|
||||
#else
|
||||
putSIMPLE();
|
||||
putVpclmulqdq();
|
||||
putReg1();
|
||||
putBt();
|
||||
putRorM();
|
||||
|
|
124
externals/xbyak/test/misc.cpp
vendored
124
externals/xbyak/test/misc.cpp
vendored
|
@ -1949,6 +1949,12 @@ CYBOZU_TEST_AUTO(misc)
|
|||
movdiri(ptr[rax+r12], r9);
|
||||
movdiri(ptr[rax+r12*2+4], r9d);
|
||||
movdir64b(r10, ptr[r8]);
|
||||
clui();
|
||||
senduipi(rax);
|
||||
senduipi(r10);
|
||||
stui();
|
||||
testui();
|
||||
uiret();
|
||||
#endif
|
||||
}
|
||||
} c;
|
||||
|
@ -1972,6 +1978,12 @@ CYBOZU_TEST_AUTO(misc)
|
|||
0x4e, 0x0f, 0x38, 0xf9, 0x0c, 0x20, // movdiri
|
||||
0x46, 0x0f, 0x38, 0xf9, 0x4c, 0x60, 0x04, // movdiri
|
||||
0x66, 0x45, 0x0f, 0x38, 0xf8, 0x10, // movdir64b
|
||||
0xf3, 0x0f, 0x01, 0xee, // clui
|
||||
0xf3, 0x0f, 0xc7, 0xf0, // senduipi rax
|
||||
0xf3, 0x41, 0x0f, 0xc7, 0xf2, // senduipi r10
|
||||
0xf3, 0x0f, 0x01, 0xef, // stui
|
||||
0xf3, 0x0f, 0x01, 0xed, // testui
|
||||
0xf3, 0x0f, 0x01, 0xec, // uiret
|
||||
#endif
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
|
@ -2157,4 +2169,116 @@ CYBOZU_TEST_AUTO(prefetchiti)
|
|||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(crypto)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vsha512msg1(ymm3, xmm5);
|
||||
vsha512msg2(ymm9, ymm10);
|
||||
vsha512rnds2(ymm1, ymm3, xmm2);
|
||||
|
||||
vsm3msg1(xmm1, xmm2, xmm3);
|
||||
vsm3msg1(xmm1, xmm2, ptr [rax]);
|
||||
vsm3msg2(xmm5, xmm7, xmm3);
|
||||
vsm3msg2(xmm5, xmm6, ptr [rax]);
|
||||
vsm3rnds2(xmm5, xmm7, xmm3, 0x12);
|
||||
vsm3rnds2(xmm5, xmm7, ptr [rcx], 0x34);
|
||||
|
||||
vsm4key4(xmm1, xmm2, xmm3);
|
||||
vsm4key4(xmm1, xmm2, ptr [rdx]);
|
||||
vsm4rnds4(xmm1, xmm2, xmm3);
|
||||
vsm4rnds4(xmm5, xmm6, ptr [rcx+rax*4]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
// sha512
|
||||
0xc4, 0xe2, 0x7f, 0xcc, 0xdd,
|
||||
0xc4, 0x42, 0x7f, 0xcd, 0xca,
|
||||
0xc4, 0xe2, 0x67, 0xcb, 0xca,
|
||||
|
||||
// sm3
|
||||
0xC4, 0xE2, 0x68, 0xDA, 0xCB,
|
||||
0xC4, 0xE2, 0x68, 0xDA, 0x08,
|
||||
0xC4, 0xE2, 0x41, 0xDA, 0xEB,
|
||||
0xC4, 0xE2, 0x49, 0xDA, 0x28,
|
||||
0xC4, 0xE3, 0x41, 0xDE, 0xEB, 0x12,
|
||||
0xC4, 0xE3, 0x41, 0xDE, 0x29, 0x34,
|
||||
|
||||
// sm4
|
||||
0xc4, 0xe2, 0x6a, 0xda, 0xcb,
|
||||
0xc4, 0xe2, 0x6a, 0xda, 0x0a,
|
||||
0xc4, 0xe2, 0x6b, 0xda, 0xcb,
|
||||
0xc4, 0xe2, 0x4b, 0xda, 0x2c, 0x81,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(avx_vnni_int)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbssd(xmm1, xmm2, xmm3);
|
||||
vpdpbssd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbssds(xmm1, xmm2, xmm3);
|
||||
vpdpbssds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsud(xmm1, xmm2, xmm3);
|
||||
vpdpbsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbsuds(xmm1, xmm2, xmm3);
|
||||
vpdpbsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuud(xmm1, xmm2, xmm3);
|
||||
vpdpbuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpbuuds(xmm1, xmm2, xmm3);
|
||||
vpdpbuuds(ymm1, ymm2, ptr [rax]);
|
||||
|
||||
vpdpwsud(xmm1, xmm2, xmm3);
|
||||
vpdpwsud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwsuds(xmm1, xmm2, xmm3);
|
||||
vpdpwsuds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusd(xmm1, xmm2, xmm3);
|
||||
vpdpwusd(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwusds(xmm1, xmm2, xmm3);
|
||||
vpdpwusds(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuud(xmm1, xmm2, xmm3);
|
||||
vpdpwuud(ymm1, ymm2, ptr [rax]);
|
||||
vpdpwuuds(xmm1, xmm2, xmm3);
|
||||
vpdpwuuds(ymm1, ymm2, ptr [rax]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe2, 0x6b, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6f, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x6b, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6f, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0x50, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0x50, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0x51, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0x51, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x6a, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6e, 0xd3, 0x08,
|
||||
0xc4, 0xe2, 0x69, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6d, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x69, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6d, 0xd3, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0xd2, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0xd2, 0x08,
|
||||
0xc4, 0xe2, 0x68, 0xd3, 0xcb,
|
||||
0xc4, 0xe2, 0x6c, 0xd3, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
15
externals/xbyak/test/normalize_prefix.cpp
vendored
15
externals/xbyak/test/normalize_prefix.cpp
vendored
|
@ -8,14 +8,25 @@
|
|||
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
std::string normalize(const std::string& line)
|
||||
std::string normalize(std::string line)
|
||||
{
|
||||
size_t pos = line.find('(');
|
||||
/* nasm generates byte codes containing () for xbegin, so remove it. */
|
||||
if (pos != std::string::npos) {
|
||||
line.erase(pos, 1);
|
||||
pos = line.find(')');
|
||||
if (pos == std::string::npos) {
|
||||
fprintf(stderr, "line error {%s}\n", line.c_str());
|
||||
return "";
|
||||
}
|
||||
line.erase(pos, 1);
|
||||
}
|
||||
static const char tbl[][3] = { "66", "67", "F2", "F3" };
|
||||
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
|
||||
typedef std::set<std::string> StringSet;
|
||||
StringSet suf;
|
||||
|
||||
size_t pos = 0;
|
||||
pos = 0;
|
||||
for (; pos < line.size(); pos += 2) {
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < tblNum; i++) {
|
||||
|
|
2
externals/xbyak/test/test_address.sh
vendored
2
externals/xbyak/test/test_address.sh
vendored
|
@ -23,7 +23,7 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame > x.lst
|
||||
diff ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
||||
}
|
||||
|
||||
|
|
6
externals/xbyak/test/test_all.bat
vendored
6
externals/xbyak/test/test_all.bat
vendored
|
@ -5,4 +5,10 @@ call test_address
|
|||
call test_address 64
|
||||
echo *** test jmp address ***
|
||||
call test_jmp
|
||||
echo *** test misc ***
|
||||
set FILE=misc
|
||||
call test_misc
|
||||
echo *** test APX ***
|
||||
set FILE=apx
|
||||
call test_misc
|
||||
echo *** all test end ***
|
||||
|
|
2
externals/xbyak/test/test_avx.sh
vendored
2
externals/xbyak/test/test_avx.sh
vendored
|
@ -48,4 +48,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
2
externals/xbyak/test/test_avx512.sh
vendored
2
externals/xbyak/test/test_avx512.sh
vendored
|
@ -35,4 +35,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
4
externals/xbyak/test/test_misc.bat
vendored
4
externals/xbyak/test/test_misc.bat
vendored
|
@ -1,4 +1,4 @@
|
|||
call set_opt
|
||||
bmake -f Makefile.win all
|
||||
cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
|
||||
misc
|
||||
cl -I../ -I./ -DXBYAK_TEST %FILE%.cpp %OPT% /Od /Zi
|
||||
%FILE%
|
||||
|
|
2
externals/xbyak/test/test_nm.sh
vendored
2
externals/xbyak/test/test_nm.sh
vendored
|
@ -61,4 +61,4 @@ echo "xbyak"
|
|||
echo "compile nm_frame.cpp"
|
||||
$CXX $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
|
||||
./nm_frame | $FILTER > x.lst
|
||||
diff -B ok.lst x.lst && echo "ok"
|
||||
diff -bB ok.lst x.lst && echo "ok"
|
||||
|
|
851
externals/xbyak/xbyak/xbyak.h
vendored
851
externals/xbyak/xbyak/xbyak.h
vendored
File diff suppressed because it is too large
Load diff
2445
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
2445
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
File diff suppressed because it is too large
Load diff
120
externals/xbyak/xbyak/xbyak_util.h
vendored
120
externals/xbyak/xbyak/xbyak_util.h
vendored
|
@ -9,6 +9,13 @@
|
|||
#define XBYAK_THROW(x) ;
|
||||
#define XBYAK_THROW_RET(x, y) return y;
|
||||
#endif
|
||||
#ifndef XBYAK_CONSTEXPR
|
||||
#if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910)
|
||||
#define XBYAK_CONSTEXPR constexpr
|
||||
#else
|
||||
#define XBYAK_CONSTEXPR
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
#include <string.h>
|
||||
|
||||
|
@ -93,7 +100,7 @@ struct TypeT {
|
|||
};
|
||||
|
||||
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||
XBYAK_CONSTEXPR TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||
|
||||
template<typename T>
|
||||
inline T max_(T x, T y) { return x >= y ? x : y; }
|
||||
|
@ -137,6 +144,7 @@ private:
|
|||
uint32_t dataCacheSize_[maxNumberCacheLevels];
|
||||
uint32_t coresSharignDataCache_[maxNumberCacheLevels];
|
||||
uint32_t dataCacheLevels_;
|
||||
uint32_t avx10version_;
|
||||
|
||||
uint32_t get32bitAsBE(const char *x) const
|
||||
{
|
||||
|
@ -173,11 +181,9 @@ private:
|
|||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if (!has(tINTEL)) return;
|
||||
if (!has(tINTEL) && !has(tAMD)) return;
|
||||
|
||||
uint32_t data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
|
@ -211,7 +217,48 @@ private:
|
|||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if (!has(tINTEL)) return;
|
||||
if (!has(tINTEL) && !has(tAMD)) return;
|
||||
|
||||
// https://github.com/amd/ZenDNN/blob/a08bf9a9efc160a69147cdecfb61cc85cc0d4928/src/cpu/x64/xbyak/xbyak_util.h#L236-L288
|
||||
if (has(tAMD)) {
|
||||
// There are 3 Data Cache Levels (L1, L2, L3)
|
||||
dataCacheLevels_ = 3;
|
||||
const uint32_t leaf = 0x8000001D; // for modern AMD CPus
|
||||
// Sub leaf value ranges from 0 to 3
|
||||
// Sub leaf value 0 refers to L1 Data Cache
|
||||
// Sub leaf value 1 refers to L1 Instruction Cache
|
||||
// Sub leaf value 2 refers to L2 Cache
|
||||
// Sub leaf value 3 refers to L3 Cache
|
||||
// For legacy AMD CPU, use leaf 0x80000005 for L1 cache
|
||||
// and 0x80000006 for L2 and L3 cache
|
||||
int cache_index = 0;
|
||||
for (uint32_t sub_leaf = 0; sub_leaf <= dataCacheLevels_; sub_leaf++) {
|
||||
// Skip sub_leaf = 1 as it refers to
|
||||
// L1 Instruction Cache (not required)
|
||||
if (sub_leaf == 1) {
|
||||
continue;
|
||||
}
|
||||
uint32_t data[4] = {};
|
||||
getCpuidEx(leaf, sub_leaf, data);
|
||||
// Cache Size = Line Size * Partitions * Associativity * Cache Sets
|
||||
dataCacheSize_[cache_index] =
|
||||
(extractBit(data[1], 22, 31) + 1) // Associativity-1
|
||||
* (extractBit(data[1], 12, 21) + 1) // Partitions-1
|
||||
* (extractBit(data[1], 0, 11) + 1) // Line Size
|
||||
* (data[2] + 1);
|
||||
// Calculate the number of cores sharing the current data cache
|
||||
int smt_width = numCores_[0];
|
||||
int logical_cores = numCores_[1];
|
||||
int actual_logical_cores = extractBit(data[0], 14, 25) /* # of cores * # of threads */ + 1;
|
||||
if (logical_cores != 0) {
|
||||
actual_logical_cores = local::min_(actual_logical_cores, logical_cores);
|
||||
}
|
||||
coresSharignDataCache_[cache_index] = local::max_(actual_logical_cores / smt_width, 1);
|
||||
++cache_index;
|
||||
}
|
||||
return;
|
||||
}
|
||||
// intel
|
||||
const uint32_t NO_CACHE = 0;
|
||||
const uint32_t DATA_CACHE = 1;
|
||||
// const uint32_t INSTRUCTION_CACHE = 2;
|
||||
|
@ -417,6 +464,21 @@ public:
|
|||
XBYAK_DEFINE_TYPE(72, tRAO_INT);
|
||||
XBYAK_DEFINE_TYPE(73, tCMPCCXADD);
|
||||
XBYAK_DEFINE_TYPE(74, tPREFETCHITI);
|
||||
XBYAK_DEFINE_TYPE(75, tSERIALIZE);
|
||||
XBYAK_DEFINE_TYPE(76, tUINTR);
|
||||
XBYAK_DEFINE_TYPE(77, tXSAVE);
|
||||
XBYAK_DEFINE_TYPE(78, tSHA512);
|
||||
XBYAK_DEFINE_TYPE(79, tSM3);
|
||||
XBYAK_DEFINE_TYPE(80, tSM4);
|
||||
XBYAK_DEFINE_TYPE(81, tAVX_VNNI_INT16);
|
||||
XBYAK_DEFINE_TYPE(82, tAPX_F);
|
||||
XBYAK_DEFINE_TYPE(83, tAVX10);
|
||||
XBYAK_DEFINE_TYPE(84, tAESKLE);
|
||||
XBYAK_DEFINE_TYPE(85, tWIDE_KL);
|
||||
XBYAK_DEFINE_TYPE(86, tKEYLOCKER);
|
||||
XBYAK_DEFINE_TYPE(87, tKEYLOCKER_WIDE);
|
||||
XBYAK_DEFINE_TYPE(88, tSSE4a);
|
||||
XBYAK_DEFINE_TYPE(89, tCLWB);
|
||||
|
||||
#undef XBYAK_SPLIT_ID
|
||||
#undef XBYAK_DEFINE_TYPE
|
||||
|
@ -428,6 +490,7 @@ public:
|
|||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
, avx10version_(0)
|
||||
{
|
||||
uint32_t data[4] = {};
|
||||
const uint32_t& EAX = data[0];
|
||||
|
@ -462,13 +525,14 @@ public:
|
|||
if (maxExtendedNum >= 0x80000001) {
|
||||
getCpuid(0x80000001, data);
|
||||
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (ECX & (1U << 5)) type_ |= tLZCNT;
|
||||
if (ECX & (1U << 6)) type_ |= tSSE4a;
|
||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
}
|
||||
|
||||
if (maxExtendedNum >= 0x80000008) {
|
||||
|
@ -478,16 +542,17 @@ public:
|
|||
|
||||
getCpuid(1, data);
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
if (ECX & (1U << 19)) type_ |= tSSE41;
|
||||
if (ECX & (1U << 20)) type_ |= tSSE42;
|
||||
if (ECX & (1U << 22)) type_ |= tMOVBE;
|
||||
if (ECX & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (ECX & (1U << 25)) type_ |= tAESNI;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 26)) type_ |= tXSAVE;
|
||||
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
if (ECX & (1U << 29)) type_ |= tF16C;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 23)) type_ |= tMMX;
|
||||
|
@ -498,8 +563,8 @@ public:
|
|||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64_t bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
if (ECX & (1U << 12)) type_ |= tFMA;
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
|
||||
#if !defined(__APPLE__)
|
||||
if (((bv >> 5) & 7) == 7)
|
||||
|
@ -533,29 +598,36 @@ public:
|
|||
const uint32_t maxNumSubLeaves = EAX;
|
||||
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
|
||||
if (EBX & (1U << 3)) type_ |= tBMI1;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 8)) type_ |= tBMI2;
|
||||
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||
if (EBX & (1U << 19)) type_ |= tADX;
|
||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 24)) type_ |= tCLWB;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (ECX & (1U << 5)) type_ |= tWAITPKG;
|
||||
if (ECX & (1U << 8)) type_ |= tGFNI;
|
||||
if (ECX & (1U << 9)) type_ |= tVAES;
|
||||
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
|
||||
if (ECX & (1U << 23)) type_ |= tKEYLOCKER;
|
||||
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
|
||||
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
|
||||
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
|
||||
if (EDX & (1U << 5)) type_ |= tUINTR;
|
||||
if (EDX & (1U << 14)) type_ |= tSERIALIZE;
|
||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
||||
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
if (maxNumSubLeaves >= 1) {
|
||||
getCpuidEx(7, 1, data);
|
||||
if (EAX & (1U << 0)) type_ |= tSHA512;
|
||||
if (EAX & (1U << 1)) type_ |= tSM3;
|
||||
if (EAX & (1U << 2)) type_ |= tSM4;
|
||||
if (EAX & (1U << 3)) type_ |= tRAO_INT;
|
||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||
if (type_ & tAVX512F) {
|
||||
|
@ -566,9 +638,22 @@ public:
|
|||
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
|
||||
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
|
||||
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
|
||||
if (EDX & (1U << 10)) type_ |= tAVX_VNNI_INT16;
|
||||
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
|
||||
if (EDX & (1U << 19)) type_ |= tAVX10;
|
||||
if (EDX & (1U << 21)) type_ |= tAPX_F;
|
||||
}
|
||||
}
|
||||
if (maxNum >= 0x19) {
|
||||
getCpuidEx(0x19, 0, data);
|
||||
if (EBX & (1U << 0)) type_ |= tAESKLE;
|
||||
if (EBX & (1U << 2)) type_ |= tWIDE_KL;
|
||||
if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE;
|
||||
}
|
||||
if (has(tAVX10) && maxNum >= 0x24) {
|
||||
getCpuidEx(0x24, 0, data);
|
||||
avx10version_ = EBX & mask(7);
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
|
@ -585,6 +670,7 @@ public:
|
|||
{
|
||||
return (type & type_) == type;
|
||||
}
|
||||
int getAVX10version() const { return avx10version_; }
|
||||
};
|
||||
|
||||
#ifndef XBYAK_ONLY_CLASS_CPU
|
||||
|
|
Loading…
Reference in a new issue