externals: Update xbyak to v6.06
Merge commit 'c926d9f40978bee57fbf245cf1c6eb347943e4d9'
This commit is contained in:
commit
39e21920db
29 changed files with 2936 additions and 861 deletions
1
externals/xbyak/.github/FUNDING.yml
vendored
Normal file
1
externals/xbyak/.github/FUNDING.yml
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
github: herumi
|
2
externals/xbyak/.github/workflows/main.yml
vendored
2
externals/xbyak/.github/workflows/main.yml
vendored
|
@ -7,5 +7,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
- run: sudo apt update
|
||||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
- run: sudo apt install nasm yasm g++-multilib tcsh
|
||||||
- run: make test
|
- run: make test
|
||||||
|
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||||
|
|
21
externals/xbyak/CMakeLists.txt
vendored
21
externals/xbyak/CMakeLists.txt
vendored
|
@ -1,6 +1,6 @@
|
||||||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||||
|
|
||||||
project(xbyak CXX)
|
project(xbyak LANGUAGES CXX VERSION 6.06)
|
||||||
|
|
||||||
file(GLOB headers xbyak/*.h)
|
file(GLOB headers xbyak/*.h)
|
||||||
|
|
||||||
|
@ -18,17 +18,26 @@ if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
|
||||||
install(
|
install(
|
||||||
TARGETS ${PROJECT_NAME}
|
TARGETS ${PROJECT_NAME}
|
||||||
EXPORT ${PROJECT_NAME}-targets
|
EXPORT ${PROJECT_NAME}-targets
|
||||||
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(
|
include(CMakePackageConfigHelpers)
|
||||||
|
configure_package_config_file(
|
||||||
cmake/config.cmake.in
|
cmake/config.cmake.in
|
||||||
${PROJECT_NAME}Config.cmake
|
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||||
@ONLY
|
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||||
|
)
|
||||||
|
write_basic_package_version_file(
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||||
|
COMPATIBILITY SameMajorVersion
|
||||||
)
|
)
|
||||||
|
|
||||||
install(
|
install(
|
||||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
FILES
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||||
|
DESTINATION
|
||||||
|
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||||
)
|
)
|
||||||
|
|
||||||
install(
|
install(
|
||||||
|
|
20
externals/xbyak/COPYRIGHT
vendored
20
externals/xbyak/COPYRIGHT
vendored
|
@ -25,23 +25,3 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
THE POSSIBILITY OF SUCH DAMAGE.
|
THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
|
|
||||||
す場合に限り、再頒布および使用が許可されます。
|
|
||||||
|
|
||||||
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
|
|
||||||
を含めること。
|
|
||||||
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
|
|
||||||
権表示、本条件一覧、および下記免責条項を含めること。
|
|
||||||
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
|
|
||||||
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
|
|
||||||
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
|
|
||||||
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
|
|
||||||
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
|
|
||||||
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
|
|
||||||
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
|
|
||||||
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
|
|
||||||
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
|
|
||||||
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
|
|
||||||
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
|
|
||||||
一切責任を負わないものとします。
|
|
||||||
|
|
2
externals/xbyak/cmake/config.cmake.in
vendored
2
externals/xbyak/cmake/config.cmake.in
vendored
|
@ -1 +1,3 @@
|
||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")
|
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")
|
||||||
|
|
8
externals/xbyak/cmake/meson-config.cmake.in
vendored
Normal file
8
externals/xbyak/cmake/meson-config.cmake.in
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
if(NOT TARGET @TARGET_NAME@)
|
||||||
|
add_library(@TARGET_NAME@ INTERFACE IMPORTED)
|
||||||
|
set_target_properties(@TARGET_NAME@ PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "@ABSOLUTE_INCLUDE_DIR@"
|
||||||
|
)
|
||||||
|
endif()
|
180
externals/xbyak/doc/changelog.md
vendored
Normal file
180
externals/xbyak/doc/changelog.md
vendored
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
# History
|
||||||
|
|
||||||
|
* 2022/Jun/01 ver 6.06 refactor Cpu::Type class and improve MmapAllocator when XBYAK_USE_MEMFD is defined.
|
||||||
|
* 2022/Mar/20 ver 6.052 add Cpu::operator==()
|
||||||
|
* 2022/Mar/13 ver 6.051 fix compile error when XBYAK_NO_EXCEPTION is defined
|
||||||
|
* 2022/Mar/12 ver 6.05 add movdiri, movdir64b, clwb, cldemote
|
||||||
|
* 2022/Apr/22 ver 6.041 consider Android and mingw
|
||||||
|
* 2022/Apr/05 ver 6.04 add tpause, umonitor, umwait
|
||||||
|
* 2022/Mar/08 ver 6.03 MmapAllocator supports memfd with user-defined strings.
|
||||||
|
* 2022/Jan/28 ver 6.02 strict check the range of 32-bit dispacement
|
||||||
|
* 2021/Dec/14 ver 6.01 support T_FAR jump/call and retf
|
||||||
|
* 2021/Sep/14 ver 6.00 fully support AVX512-FP16
|
||||||
|
* 2021/Sep/09 ver 5.997 fix vrndscale* to support {sae}
|
||||||
|
* 2021/Sep/03 ver 5.996 fix v{add,sub,mul,div,max,min}{sd,ss} to support T_rd_sae.
|
||||||
|
* 2021/Aug/15 ver 5.995 add a label to /proc/self/maps if XBYAK_USE_MEMFD is defined on Linux
|
||||||
|
* 2021/Jun/17 ver 5.994 add alias of vcmpXX{ps,pd,ss,sd} with mask register
|
||||||
|
* 2021/Jun/06 ver 5.993 strict check of gather/scatter register combination
|
||||||
|
* 2021/May/09 ver 5.992 support endbr32 and endbr64
|
||||||
|
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
||||||
|
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
||||||
|
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
||||||
|
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
||||||
|
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
||||||
|
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
||||||
|
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
||||||
|
* 2020/Jul/21 ver 5.93 support exception-less mode
|
||||||
|
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
||||||
|
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
||||||
|
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
||||||
|
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
||||||
|
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||||
|
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||||
|
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||||
|
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||||
|
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||||
|
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
||||||
|
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
||||||
|
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
||||||
|
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
||||||
|
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
||||||
|
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
||||||
|
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
||||||
|
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
||||||
|
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
||||||
|
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||||
|
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
||||||
|
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
||||||
|
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||||
|
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||||
|
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||||
|
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||||
|
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||||
|
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||||
|
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||||
|
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||||
|
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||||
|
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||||
|
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||||
|
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||||
|
* 2018/Jul/26 ver 5.661 support mingw64
|
||||||
|
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||||
|
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||||
|
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||||
|
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||||
|
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||||
|
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||||
|
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||||
|
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||||
|
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||||
|
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||||
|
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||||
|
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||||
|
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
||||||
|
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
||||||
|
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
||||||
|
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
||||||
|
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
||||||
|
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
||||||
|
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
||||||
|
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
||||||
|
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
||||||
|
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
||||||
|
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
||||||
|
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
||||||
|
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
||||||
|
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
||||||
|
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
||||||
|
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
||||||
|
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
||||||
|
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
||||||
|
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
||||||
|
* 2016/Aug/03 ver 5.01 disable omitted operand
|
||||||
|
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
||||||
|
* 2016/Jun/13 avx-512 add mask instructions
|
||||||
|
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
||||||
|
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
||||||
|
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
||||||
|
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
||||||
|
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
||||||
|
* 2015/Oct/05 ver 4.87 support segment selectors
|
||||||
|
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
||||||
|
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
||||||
|
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
||||||
|
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
||||||
|
* 2015/May/24 ver 4.82 support detection of F16C
|
||||||
|
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
||||||
|
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
||||||
|
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
||||||
|
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
||||||
|
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
||||||
|
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
||||||
|
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
||||||
|
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
||||||
|
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
||||||
|
* 2014/Mar/16 ver 4.50 support new Label
|
||||||
|
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
||||||
|
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
||||||
|
* 2013/Oct/16 ver 4.21 label support std::string
|
||||||
|
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||||
|
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||||
|
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||||
|
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||||
|
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||||
|
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||||
|
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||||
|
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
||||||
|
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
||||||
|
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
||||||
|
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
||||||
|
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
||||||
|
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
||||||
|
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
||||||
|
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||||
|
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
||||||
|
* 2012/May/03 ver 3.60 change interface of Allocator
|
||||||
|
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
||||||
|
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
||||||
|
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
||||||
|
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
||||||
|
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
||||||
|
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
||||||
|
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
||||||
|
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
||||||
|
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||||
|
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||||
|
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||||
|
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||||
|
* 2011/Feb/04 ver 2.99 beta support AVX
|
||||||
|
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||||
|
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||||
|
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
||||||
|
* 2010/Jun/17 ver 2.28 move some member functions to public
|
||||||
|
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||||
|
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
||||||
|
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||||
|
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
||||||
|
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||||
|
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||||
|
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||||
|
* 2009/Nov/28 support a part of FPU
|
||||||
|
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
||||||
|
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
||||||
|
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
||||||
|
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
||||||
|
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
||||||
|
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
||||||
|
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
||||||
|
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
||||||
|
* 2008/Jun/02 support memory interface allocated by user
|
||||||
|
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
||||||
|
* 2008/Apr/30 add cmpxchg16b, cdqe
|
||||||
|
* 2008/Apr/29 support x64
|
||||||
|
* 2008/Apr/14 code refactoring
|
||||||
|
* 2008/Mar/12 add bsr/bsf
|
||||||
|
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
||||||
|
* 2007/Nov/5 support lock, xadd, xchg
|
||||||
|
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
||||||
|
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
||||||
|
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||||
|
* 2007/Jan/4 first version
|
14
externals/xbyak/doc/install.md
vendored
Normal file
14
externals/xbyak/doc/install.md
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Install
|
||||||
|
|
||||||
|
The following files are necessary. Please add the path to your compile directory.
|
||||||
|
|
||||||
|
* xbyak.h
|
||||||
|
* xbyak_mnemonic.h
|
||||||
|
* xbyak_util.h
|
||||||
|
|
||||||
|
Linux:
|
||||||
|
```
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
These files are copied into `/usr/local/include/xbyak`.
|
409
externals/xbyak/doc/usage.md
vendored
Normal file
409
externals/xbyak/doc/usage.md
vendored
Normal file
|
@ -0,0 +1,409 @@
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||||
|
```
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code(int x)
|
||||||
|
{
|
||||||
|
mov(eax, x);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
Or you can pass the instance of CodeGenerator without inheriting.
|
||||||
|
```
|
||||||
|
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||||
|
using namespace Xbyak::util;
|
||||||
|
code.mov(eax, x);
|
||||||
|
code.ret();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Make an instance of the class and get the function
|
||||||
|
pointer by calling `getCode()` and call it.
|
||||||
|
```
|
||||||
|
Code c(5);
|
||||||
|
int (*f)() = c.getCode<int (*)()>();
|
||||||
|
printf("ret=%d\n", f()); // ret = 5
|
||||||
|
```
|
||||||
|
|
||||||
|
## Syntax
|
||||||
|
Similar to MASM/NASM syntax with parentheses.
|
||||||
|
|
||||||
|
```
|
||||||
|
NASM Xbyak
|
||||||
|
mov eax, ebx --> mov(eax, ebx);
|
||||||
|
inc ecx inc(ecx);
|
||||||
|
ret --> ret();
|
||||||
|
```
|
||||||
|
|
||||||
|
## Addressing
|
||||||
|
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||||
|
otherwise use `ptr`.
|
||||||
|
|
||||||
|
```
|
||||||
|
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||||
|
[rip + 32bit disp] ; x64 only
|
||||||
|
|
||||||
|
NASM Xbyak
|
||||||
|
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||||
|
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||||
|
test byte [esp], 4 --> test(byte [esp], 4);
|
||||||
|
inc qword [rax] --> inc(qword [rax]);
|
||||||
|
```
|
||||||
|
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||||
|
|
||||||
|
### How to use Selector (Segment Register)
|
||||||
|
```
|
||||||
|
mov eax, [fs:eax] --> putSeg(fs);
|
||||||
|
mov(eax, ptr [eax]);
|
||||||
|
mov ax, cs --> mov(ax, cs);
|
||||||
|
```
|
||||||
|
**Note**: Segment class is not derived from `Operand`.
|
||||||
|
|
||||||
|
## AVX
|
||||||
|
|
||||||
|
```
|
||||||
|
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||||
|
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||||
|
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||||
|
But the newer version will not support it.
|
||||||
|
```
|
||||||
|
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||||
|
```
|
||||||
|
|
||||||
|
## AVX-512
|
||||||
|
|
||||||
|
```
|
||||||
|
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||||
|
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||||
|
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||||
|
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||||
|
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||||
|
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||||
|
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||||
|
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||||
|
|
||||||
|
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
||||||
|
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||||
|
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||||
|
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||||
|
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||||
|
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||||
|
|
||||||
|
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||||
|
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||||
|
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||||
|
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||||
|
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||||
|
|
||||||
|
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||||
|
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||||
|
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||||
|
|
||||||
|
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||||
|
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||||
|
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||||
|
```
|
||||||
|
### Remark
|
||||||
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
|
- `k0` is dealt as no mask.
|
||||||
|
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||||
|
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||||
|
* `k4 | k3` is different from `k3 | k4`.
|
||||||
|
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||||
|
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||||
|
|
||||||
|
## Label
|
||||||
|
Two kinds of Label are supported. (String literal and Label class).
|
||||||
|
|
||||||
|
### String literal
|
||||||
|
```
|
||||||
|
L("L1");
|
||||||
|
jmp("L1");
|
||||||
|
|
||||||
|
jmp("L2");
|
||||||
|
...
|
||||||
|
a few mnemonics (8-bit displacement jmp)
|
||||||
|
...
|
||||||
|
L("L2");
|
||||||
|
|
||||||
|
jmp("L3", T_NEAR);
|
||||||
|
...
|
||||||
|
a lot of mnemonics (32-bit displacement jmp)
|
||||||
|
...
|
||||||
|
L("L3");
|
||||||
|
```
|
||||||
|
|
||||||
|
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||||
|
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||||
|
|
||||||
|
### Support `@@`, `@f`, `@b` like MASM
|
||||||
|
|
||||||
|
```
|
||||||
|
L("@@"); // <A>
|
||||||
|
jmp("@b"); // jmp to <A>
|
||||||
|
jmp("@f"); // jmp to <B>
|
||||||
|
L("@@"); // <B>
|
||||||
|
jmp("@b"); // jmp to <B>
|
||||||
|
mov(eax, "@b");
|
||||||
|
jmp(eax); // jmp to <B>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Local label
|
||||||
|
|
||||||
|
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||||
|
are treated as a local label.
|
||||||
|
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||||
|
|
||||||
|
```
|
||||||
|
void func1()
|
||||||
|
{
|
||||||
|
inLocalLabel();
|
||||||
|
L(".lp"); // <A> ; local label
|
||||||
|
...
|
||||||
|
jmp(".lp"); // jmp to <A>
|
||||||
|
L("aaa"); // global label <C>
|
||||||
|
outLocalLabel();
|
||||||
|
|
||||||
|
inLocalLabel();
|
||||||
|
L(".lp"); // <B> ; local label
|
||||||
|
func1();
|
||||||
|
jmp(".lp"); // jmp to <B>
|
||||||
|
inLocalLabel();
|
||||||
|
jmp("aaa"); // jmp to <C>
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### short and long jump
|
||||||
|
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||||
|
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||||
|
|
||||||
|
```
|
||||||
|
jmp("short-jmp"); // short jmp
|
||||||
|
// small code
|
||||||
|
L("short-jmp");
|
||||||
|
|
||||||
|
jmp("long-jmp");
|
||||||
|
// long code
|
||||||
|
L("long-jmp"); // throw exception
|
||||||
|
```
|
||||||
|
Then specify T_NEAR for jmp.
|
||||||
|
```
|
||||||
|
jmp("long-jmp", T_NEAR); // long jmp
|
||||||
|
// long code
|
||||||
|
L("long-jmp");
|
||||||
|
```
|
||||||
|
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||||
|
```
|
||||||
|
jmp("long-jmp"); // long jmp
|
||||||
|
// long code
|
||||||
|
L("long-jmp");
|
||||||
|
```
|
||||||
|
|
||||||
|
### Label class
|
||||||
|
|
||||||
|
`L()` and `jxx()` support Label class.
|
||||||
|
|
||||||
|
```
|
||||||
|
Xbyak::Label label1, label2;
|
||||||
|
L(label1);
|
||||||
|
...
|
||||||
|
jmp(label1);
|
||||||
|
...
|
||||||
|
jmp(label2);
|
||||||
|
...
|
||||||
|
L(label2);
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `putL` for jmp table
|
||||||
|
```
|
||||||
|
Label labelTbl, L0, L1, L2;
|
||||||
|
mov(rax, labelTbl);
|
||||||
|
// rdx is an index of jump table
|
||||||
|
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||||
|
L(labelTbl);
|
||||||
|
putL(L0);
|
||||||
|
putL(L1);
|
||||||
|
putL(L2);
|
||||||
|
L(L0);
|
||||||
|
....
|
||||||
|
L(L1);
|
||||||
|
....
|
||||||
|
```
|
||||||
|
|
||||||
|
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||||
|
|
||||||
|
```
|
||||||
|
Label label2;
|
||||||
|
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||||
|
...
|
||||||
|
jmp(label2); // label2 is not determined here
|
||||||
|
...
|
||||||
|
assignL(label2, label1); // label2 <- label1
|
||||||
|
```
|
||||||
|
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
* srcLabel must be used in `L()`.
|
||||||
|
* dstLabel must not be used in `L()`.
|
||||||
|
|
||||||
|
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||||
|
```
|
||||||
|
// not AutoGrow mode
|
||||||
|
Label label;
|
||||||
|
assert(label.getAddress() == 0);
|
||||||
|
L(label);
|
||||||
|
assert(label.getAddress() == getCurr());
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rip ; relative addressing
|
||||||
|
```
|
||||||
|
Label label;
|
||||||
|
mov(eax, ptr [rip + label]); // eax = 4
|
||||||
|
...
|
||||||
|
|
||||||
|
L(label);
|
||||||
|
dd(4);
|
||||||
|
```
|
||||||
|
```
|
||||||
|
int x;
|
||||||
|
...
|
||||||
|
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||||
|
```
|
||||||
|
|
||||||
|
## Far jump
|
||||||
|
|
||||||
|
Use `word|dword|qword` instead of `ptr` to specify the address size.
|
||||||
|
|
||||||
|
### 32 bit mode
|
||||||
|
```
|
||||||
|
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||||
|
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 64 bit mode
|
||||||
|
```
|
||||||
|
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||||
|
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||||
|
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||||
|
```
|
||||||
|
The same applies to `call`.
|
||||||
|
|
||||||
|
## Code size
|
||||||
|
The default max code size is 4096 bytes.
|
||||||
|
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||||
|
|
||||||
|
```
|
||||||
|
class Quantize : public Xbyak::CodeGenerator {
|
||||||
|
public:
|
||||||
|
Quantize()
|
||||||
|
: CodeGenerator(8192)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## User allocated memory
|
||||||
|
|
||||||
|
You can make jit code on prepared memory.
|
||||||
|
|
||||||
|
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||||
|
|
||||||
|
```
|
||||||
|
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||||
|
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||||
|
{
|
||||||
|
mov(rax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
Code c;
|
||||||
|
c.setProtectModeRE(); // set memory to Read/Exec
|
||||||
|
printf("%d\n", c.getCode<int(*)()>()());
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: See [../sample/test0.cpp](../sample/test0.cpp).
|
||||||
|
|
||||||
|
### AutoGrow
|
||||||
|
|
||||||
|
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||||
|
|
||||||
|
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||||
|
```
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Code c;
|
||||||
|
// generate code for jit
|
||||||
|
c.ready(); // mode = Read/Write/Exec
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||||
|
|
||||||
|
### Read/Exec mode
|
||||||
|
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||||
|
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||||
|
call `setProtectModeRE()` after generating jit code.
|
||||||
|
|
||||||
|
```
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Code c;
|
||||||
|
c.setProtectModeRE();
|
||||||
|
...
|
||||||
|
|
||||||
|
```
|
||||||
|
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||||
|
See [protect-re.cpp](../sample/protect-re.cpp).
|
||||||
|
|
||||||
|
## Exception-less mode
|
||||||
|
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
||||||
|
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
||||||
|
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
||||||
|
`CodeGenerator::reset()` calls `ClearError()`.
|
||||||
|
|
||||||
|
## Macro
|
||||||
|
|
||||||
|
* **XBYAK32** is defined on 32bit.
|
||||||
|
* **XBYAK64** is defined on 64bit.
|
||||||
|
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
||||||
|
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
||||||
|
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||||
|
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
||||||
|
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
||||||
|
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
||||||
|
* define **XBYAK_USE_MEMFD** on Linux then /proc/self/maps shows the area used by xbyak.
|
||||||
|
* define **XBYAK_OLD_DISP_CHECK** if the old disp check is necessary (deprecated in the future).
|
||||||
|
|
||||||
|
## Sample
|
||||||
|
|
||||||
|
* [test0.cpp](../sample/test0.cpp) ; tiny sample (x86, x64)
|
||||||
|
* [quantize.cpp](../sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||||
|
* [calc.cpp](../sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||||
|
* [bf.cpp](../sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
17
externals/xbyak/gen/Makefile
vendored
17
externals/xbyak/gen/Makefile
vendored
|
@ -1,7 +1,7 @@
|
||||||
TARGET=../xbyak/xbyak_mnemonic.h
|
TARGET=../xbyak/xbyak_mnemonic.h
|
||||||
BIN=sortline gen_code gen_avx512
|
BIN=sortline gen_code gen_avx512
|
||||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
|
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||||
all: $(TARGET)
|
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||||
sortline: sortline.cpp
|
sortline: sortline.cpp
|
||||||
$(CXX) $(CFLAGS) $< -o $@
|
$(CXX) $(CFLAGS) $< -o $@
|
||||||
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
|
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||||
|
@ -22,5 +22,18 @@ $(TARGET): $(BIN)
|
||||||
echo "#endif" >> $@
|
echo "#endif" >> $@
|
||||||
echo "#endif" >> $@
|
echo "#endif" >> $@
|
||||||
|
|
||||||
|
VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
|
||||||
|
../CMakeLists.txt: $(TARGET)
|
||||||
|
sed -i -e 's/CXX VERSION [0-9.]*/CXX VERSION $(VER)/' $@
|
||||||
|
|
||||||
|
../meson.build: $(TARGET)
|
||||||
|
sed -i -e "s/version: '[0-9.]*',/version: '$(VER)',/" $@
|
||||||
|
|
||||||
|
../readme.md: $(TARGET)
|
||||||
|
sed -l 2 -i -e "s/# Xbyak [0-9.]*/# Xbyak $(VER)/" $@
|
||||||
|
|
||||||
|
../readme.txt: $(TARGET)
|
||||||
|
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(BIN) $(TARGET)
|
$(RM) $(BIN) $(TARGET)
|
||||||
|
|
54
externals/xbyak/gen/avx_type.hpp
vendored
54
externals/xbyak/gen/avx_type.hpp
vendored
|
@ -12,9 +12,10 @@
|
||||||
//
|
//
|
||||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||||
T_66 = 1 << 5,
|
T_66 = 1 << 5, // pp = 1
|
||||||
T_F3 = 1 << 6,
|
T_F3 = 1 << 6, // pp = 2
|
||||||
T_F2 = 1 << 7,
|
T_F2 = T_66 | T_F3, // pp = 3
|
||||||
|
T_ER_R = 1 << 7, // reg{er}
|
||||||
T_0F = 1 << 8,
|
T_0F = 1 << 8,
|
||||||
T_0F38 = 1 << 9,
|
T_0F38 = 1 << 9,
|
||||||
T_0F3A = 1 << 10,
|
T_0F3A = 1 << 10,
|
||||||
|
@ -35,11 +36,18 @@
|
||||||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||||
T_B32 = 1 << 26, // m32bcst
|
T_B32 = 1 << 26, // m32bcst
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
|
T_B16 = T_B32 | T_B64, // m16bcst
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
T_VSIB = 1 << 29,
|
T_VSIB = 1 << 29,
|
||||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||||
|
T_FP16 = 1 << 31,
|
||||||
|
T_MAP5 = T_FP16 | T_0F,
|
||||||
|
T_MAP6 = T_FP16 | T_0F38,
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
|
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||||
|
uint32_t getPP(int type) { return (type >> 5) & 3; }
|
||||||
|
|
||||||
|
|
||||||
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
||||||
|
|
||||||
|
@ -62,25 +70,30 @@ std::string type2String(int type)
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_DUP";
|
str += "T_DUP";
|
||||||
}
|
}
|
||||||
if (type & T_66) {
|
|
||||||
if (!str.empty()) str += " | ";
|
|
||||||
str += "T_66";
|
|
||||||
}
|
|
||||||
if (type & T_F3) {
|
|
||||||
if (!str.empty()) str += " | ";
|
|
||||||
str += "T_F3";
|
|
||||||
}
|
|
||||||
if (type & T_F2) {
|
if (type & T_F2) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_F2";
|
switch (type & T_F2) {
|
||||||
|
case T_66: str += "T_66"; break;
|
||||||
|
case T_F3: str += "T_F3"; break;
|
||||||
|
case T_F2: str += "T_F2"; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (type & T_0F) {
|
if (type & T_0F) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_0F";
|
if (type & T_FP16) {
|
||||||
|
str += "T_MAP5";
|
||||||
|
} else {
|
||||||
|
str += "T_0F";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (type & T_0F38) {
|
if (type & T_0F38) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_0F38";
|
if (type & T_FP16) {
|
||||||
|
str += "T_MAP6";
|
||||||
|
} else {
|
||||||
|
str += "T_0F38";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (type & T_0F3A) {
|
if (type & T_0F3A) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
|
@ -130,6 +143,10 @@ std::string type2String(int type)
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_ER_Z";
|
str += "T_ER_Z";
|
||||||
}
|
}
|
||||||
|
if (type & T_ER_R) {
|
||||||
|
if (!str.empty()) str += " | ";
|
||||||
|
str += "T_ER_R";
|
||||||
|
}
|
||||||
if (type & T_SAE_X) {
|
if (type & T_SAE_X) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_SAE_X";
|
str += "T_SAE_X";
|
||||||
|
@ -148,9 +165,12 @@ std::string type2String(int type)
|
||||||
}
|
}
|
||||||
if (type & T_B32) {
|
if (type & T_B32) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_B32";
|
if (type & T_B64) {
|
||||||
}
|
str += "T_B16"; // T_B16 = T_B32 | T_B64
|
||||||
if (type & T_B64) {
|
} else {
|
||||||
|
str += "T_B32";
|
||||||
|
}
|
||||||
|
} else if (type & T_B64) {
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_B64";
|
str += "T_B64";
|
||||||
}
|
}
|
||||||
|
|
275
externals/xbyak/gen/gen_avx512.cpp
vendored
275
externals/xbyak/gen/gen_avx512.cpp
vendored
|
@ -107,6 +107,8 @@ void putVcmp()
|
||||||
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
|
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
|
||||||
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
|
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
|
||||||
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
|
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
|
||||||
|
{ 0xC2, "vcmpph", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B16, true },
|
||||||
|
{ 0xC2, "vcmpsh", T_F3 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||||
|
|
||||||
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||||
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||||
|
@ -144,6 +146,25 @@ void putVcmp()
|
||||||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||||
}
|
}
|
||||||
|
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
|
||||||
|
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
|
||||||
|
}
|
||||||
|
|
||||||
|
void putVcmpAlias()
|
||||||
|
{
|
||||||
|
const char pred[32][16] = {
|
||||||
|
"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
|
||||||
|
"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
|
||||||
|
"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
|
||||||
|
"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
|
||||||
|
};
|
||||||
|
const char suf[][4] = { "pd", "ps", "sd", "ss" };
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
const char *s = suf[i];
|
||||||
|
for (int j = 0; j < 32; j++) {
|
||||||
|
printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// XM_X
|
// XM_X
|
||||||
|
@ -178,6 +199,14 @@ void putX_XM()
|
||||||
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||||
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||||
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||||
|
{ 0x42, "vgetexpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||||
|
|
||||||
|
{ 0x7D, "vcvtph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||||
|
{ 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||||
|
{ 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||||
|
{ 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||||
|
{ 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||||
|
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -208,6 +237,8 @@ void putM_X()
|
||||||
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||||
|
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
|
||||||
|
{ 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -299,8 +330,10 @@ void putX_X_XM_IMM()
|
||||||
|
|
||||||
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
|
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
|
||||||
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
|
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
|
||||||
|
{ 0x43, "vgetexpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||||
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||||
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||||
|
{ 0x27, "vgetmantsh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||||
|
|
||||||
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
|
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||||
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
|
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||||
|
@ -310,17 +343,26 @@ void putX_X_XM_IMM()
|
||||||
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
|
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||||
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
|
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||||
|
|
||||||
|
{ 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||||
|
|
||||||
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
|
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||||
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
|
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||||
|
|
||||||
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
|
{ 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||||
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
|
{ 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||||
|
|
||||||
|
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, true },
|
||||||
|
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, true },
|
||||||
|
{ 0x0A, "vrndscalesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, true },
|
||||||
|
|
||||||
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
|
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
|
||||||
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
|
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
|
||||||
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||||
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||||
|
|
||||||
|
{ 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false },
|
||||||
|
{ 0x2D, "vscalefsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||||
|
|
||||||
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
|
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
|
||||||
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
|
|
||||||
|
@ -343,6 +385,7 @@ void putX_X_XM_IMM()
|
||||||
|
|
||||||
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||||
|
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||||
|
|
||||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
|
@ -365,6 +408,11 @@ void putX_X_XM_IMM()
|
||||||
|
|
||||||
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||||
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||||
|
|
||||||
|
{ 0x5A, "vcvtsd2sh", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||||
|
{ 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||||
|
{ 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||||
|
{ 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
|
@ -488,20 +536,81 @@ void putBroadcast(bool only64bit)
|
||||||
|
|
||||||
void putCvt()
|
void putCvt()
|
||||||
{
|
{
|
||||||
puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }");
|
const struct Tbl {
|
||||||
puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }");
|
uint8_t code;
|
||||||
puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }");
|
const char *name;
|
||||||
puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }");
|
int type;
|
||||||
puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }");
|
int ptn;
|
||||||
puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }");
|
} tbl[] = {
|
||||||
puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }");
|
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
|
||||||
puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }");
|
{ 0x79, "vcvtss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X, 0 },
|
||||||
|
{ 0x78, "vcvttsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X, 0 },
|
||||||
|
{ 0x78, "vcvttss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X, 0 },
|
||||||
|
{ 0x2D, "vcvtsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||||
|
{ 0x79, "vcvtsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||||
|
{ 0x2C, "vcvttsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||||
|
{ 0x78, "vcvttsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||||
|
|
||||||
puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
{ 0x7B, "vcvtps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||||
puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
{ 0x79, "vcvtps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||||
puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
{ 0x7A, "vcvttps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||||
puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
{ 0x78, "vcvttps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||||
puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }");
|
{ 0x7A, "vcvtudq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 1 },
|
||||||
|
{ 0x5B, "vcvtph2dq", T_66 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||||
|
{ 0x13, "vcvtph2psx", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||||
|
{ 0x79, "vcvtph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||||
|
{ 0x5B, "vcvttph2dq", T_F3 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||||
|
{ 0x78, "vcvttph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||||
|
|
||||||
|
{ 0x79, "vcvtpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||||
|
{ 0x5B, "vcvtqq2ps", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||||
|
{ 0x78, "vcvttpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 2 },
|
||||||
|
{ 0x7A, "vcvtuqq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||||
|
|
||||||
|
{ 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||||
|
{ 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||||
|
{ 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||||
|
{ 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||||
|
{ 0x7A, "vcvttph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||||
|
|
||||||
|
{ 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||||
|
{ 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||||
|
{ 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||||
|
|
||||||
|
{ 0x5A, "vcvtpd2ph", T_66 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||||
|
{ 0x5B, "vcvtqq2ph", T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||||
|
{ 0x7A, "vcvtuqq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||||
|
|
||||||
|
{ 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||||
|
{ 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl& p = tbl[i];
|
||||||
|
std::string type = type2String(p.type);
|
||||||
|
switch (p.ptn) {
|
||||||
|
case 0:
|
||||||
|
printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||||
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||||
}
|
}
|
||||||
|
@ -628,14 +737,21 @@ void putX_XM_IMM()
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||||
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||||
|
{ 0x26, "vgetmantph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||||
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||||
|
|
||||||
|
{ 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false },
|
||||||
|
|
||||||
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||||
|
|
||||||
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
{ 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false },
|
||||||
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
{ 0x51, "vsqrtph", T_MAP5| T_YMM | T_MUST_EVEX | T_EW0 | T_ER_Z | T_B16, false },
|
||||||
|
|
||||||
|
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||||
|
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||||
|
{ 0x08, "vrndscaleph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||||
|
|
||||||
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||||
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||||
|
@ -645,6 +761,7 @@ void putX_XM_IMM()
|
||||||
|
|
||||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||||
|
{ 0x56, "vreduceph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||||
|
|
||||||
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||||
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||||
|
@ -704,8 +821,10 @@ void putMisc()
|
||||||
|
|
||||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||||
|
puts("void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }");
|
||||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||||
|
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||||
|
|
||||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
||||||
|
@ -724,6 +843,126 @@ void putV4FMA()
|
||||||
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
|
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void putFP16_1()
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x58, "add" },
|
||||||
|
{ 0x5C, "sub" },
|
||||||
|
{ 0x59, "mul" },
|
||||||
|
{ 0x5E, "div" },
|
||||||
|
{ 0x5F, "max" },
|
||||||
|
{ 0x5D, "min" },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
const Tbl *p = &tbl[i];
|
||||||
|
printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code);
|
||||||
|
printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x%02X); }\n", p->name, p->code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void putFP16_FMA()
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
bool isPH;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x06, "vfmaddsub", true },
|
||||||
|
{ 0x07, "vfmsubadd", true },
|
||||||
|
{ 0x08, "vfmadd", true },
|
||||||
|
{ 0x0C, "vfnmadd", true },
|
||||||
|
{ 0x0A, "vfmsub", true },
|
||||||
|
{ 0x0E, "vfnmsub", true },
|
||||||
|
{ 0x09, "vfmadd", false },
|
||||||
|
{ 0x0D, "vfnmadd", false },
|
||||||
|
{ 0x0B, "vfmsub", false },
|
||||||
|
{ 0x0F, "vfnmsub", false },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
for (int k = 0; k < 3; k++) {
|
||||||
|
const struct Ord {
|
||||||
|
const char *str;
|
||||||
|
uint8_t code;
|
||||||
|
} ord[] = {
|
||||||
|
{ "132", 0x90 },
|
||||||
|
{ "213", 0xA0 },
|
||||||
|
{ "231", 0xB0 },
|
||||||
|
};
|
||||||
|
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||||
|
const char *suf = 0;
|
||||||
|
if (tbl[i].isPH) {
|
||||||
|
t |= T_ER_Z | T_YMM | T_B16;
|
||||||
|
suf = "ph";
|
||||||
|
} else {
|
||||||
|
t |= T_ER_X | T_N2;
|
||||||
|
suf = "sh";
|
||||||
|
}
|
||||||
|
std::string type = type2String(t);
|
||||||
|
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||||
|
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void putFP16_FMA2()
|
||||||
|
{
|
||||||
|
const struct Tbl {
|
||||||
|
uint8_t code;
|
||||||
|
const char *name;
|
||||||
|
bool isPH;
|
||||||
|
} tbl[] = {
|
||||||
|
{ 0x56, "maddc", true },
|
||||||
|
{ 0xD6, "mulc", true },
|
||||||
|
};
|
||||||
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
|
for (int j = 0; j < 2; j++) {
|
||||||
|
int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||||
|
if (j == 0) {
|
||||||
|
t |= T_F2;
|
||||||
|
} else {
|
||||||
|
t |= T_F3;
|
||||||
|
}
|
||||||
|
const char *suf = 0;
|
||||||
|
if (tbl[i].isPH) {
|
||||||
|
t |= T_ER_Z | T_YMM | T_B32;
|
||||||
|
suf = "ph";
|
||||||
|
} else {
|
||||||
|
t |= T_ER_X | T_N2;
|
||||||
|
suf = "sh";
|
||||||
|
}
|
||||||
|
std::string type = type2String(t);
|
||||||
|
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||||
|
, j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void putFP16_2()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
|
||||||
|
std::string type = type2String(t);
|
||||||
|
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
|
||||||
|
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
|
||||||
|
}
|
||||||
|
{
|
||||||
|
int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||||
|
std::string type = type2String(t);
|
||||||
|
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
|
||||||
|
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void putFP16()
|
||||||
|
{
|
||||||
|
putFP16_1();
|
||||||
|
putFP16_FMA();
|
||||||
|
putFP16_FMA2();
|
||||||
|
putFP16_2();
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *[])
|
int main(int argc, char *[])
|
||||||
{
|
{
|
||||||
bool only64bit = argc == 2;
|
bool only64bit = argc == 2;
|
||||||
|
@ -733,6 +972,7 @@ int main(int argc, char *[])
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
putVcmp();
|
putVcmp();
|
||||||
|
putVcmpAlias();
|
||||||
putX_XM();
|
putX_XM();
|
||||||
putM_X();
|
putM_X();
|
||||||
putXM_X();
|
putXM_X();
|
||||||
|
@ -747,4 +987,5 @@ int main(int argc, char *[])
|
||||||
putMisc();
|
putMisc();
|
||||||
putScatter();
|
putScatter();
|
||||||
putV4FMA();
|
putV4FMA();
|
||||||
|
putFP16();
|
||||||
}
|
}
|
||||||
|
|
25
externals/xbyak/gen/gen_code.cpp
vendored
25
externals/xbyak/gen/gen_code.cpp
vendored
|
@ -26,6 +26,7 @@ struct GenericTbl {
|
||||||
uint8_t code1;
|
uint8_t code1;
|
||||||
uint8_t code2;
|
uint8_t code2;
|
||||||
uint8_t code3;
|
uint8_t code3;
|
||||||
|
uint8_t code4;
|
||||||
};
|
};
|
||||||
|
|
||||||
void putGeneric(const GenericTbl *p, size_t n)
|
void putGeneric(const GenericTbl *p, size_t n)
|
||||||
|
@ -34,6 +35,7 @@ void putGeneric(const GenericTbl *p, size_t n)
|
||||||
printf("void %s() { db(0x%02X); ", p->name, p->code1);
|
printf("void %s() { db(0x%02X); ", p->name, p->code1);
|
||||||
if (p->code2) printf("db(0x%02X); ", p->code2);
|
if (p->code2) printf("db(0x%02X); ", p->code2);
|
||||||
if (p->code3) printf("db(0x%02X); ", p->code3);
|
if (p->code3) printf("db(0x%02X); ", p->code3);
|
||||||
|
if (p->code4) printf("db(0x%02X); ", p->code4);
|
||||||
printf("}\n");
|
printf("}\n");
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
@ -661,6 +663,9 @@ void put()
|
||||||
{ "cmpsb", 0xA6 },
|
{ "cmpsb", 0xA6 },
|
||||||
{ "cmpsw", 0x66, 0xA7 },
|
{ "cmpsw", 0x66, 0xA7 },
|
||||||
{ "cmpsd", 0xA7 },
|
{ "cmpsd", 0xA7 },
|
||||||
|
{ "endbr32", 0xF3, 0x0F, 0x1E, 0xFB },
|
||||||
|
{ "endbr64", 0xF3, 0x0F, 0x1E, 0xFA },
|
||||||
|
{ "hlt", 0xF4 },
|
||||||
{ "int3", 0xCC },
|
{ "int3", 0xCC },
|
||||||
{ "scasb", 0xAE },
|
{ "scasb", 0xAE },
|
||||||
{ "scasw", 0x66, 0xAF },
|
{ "scasw", 0x66, 0xAF },
|
||||||
|
@ -1040,11 +1045,14 @@ void put()
|
||||||
puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
|
puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
|
||||||
puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
|
puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
|
||||||
puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
|
puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
|
||||||
|
puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
|
||||||
|
|
||||||
puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
|
puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
|
||||||
puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
|
puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
|
||||||
puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
|
puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
|
||||||
puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
|
puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
|
||||||
|
puts("void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }");
|
||||||
|
puts("void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }");
|
||||||
puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
|
puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
|
||||||
puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
|
puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
|
||||||
puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
|
puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
|
||||||
|
@ -1079,6 +1087,11 @@ void put()
|
||||||
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
||||||
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
||||||
puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }");
|
puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }");
|
||||||
|
puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||||
|
puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||||
|
puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||||
|
puts("void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }");
|
||||||
|
puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }");
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const struct Tbl {
|
const struct Tbl {
|
||||||
|
@ -1207,8 +1220,8 @@ void put()
|
||||||
printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
|
printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
|
||||||
printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
|
printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
|
||||||
if (p->only_pd_ps) continue;
|
if (p->only_pd_ps) continue;
|
||||||
printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x%02X); }\n", p->name, p->code);
|
printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x%02X); }\n", p->name, p->code);
|
||||||
printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x%02X); }\n", p->name, p->code);
|
printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x%02X); }\n", p->name, p->code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
putX_X_XM(false);
|
putX_X_XM(false);
|
||||||
|
@ -1299,7 +1312,8 @@ void put()
|
||||||
if (p->mode & 1) {
|
if (p->mode & 1) {
|
||||||
const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
|
const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
|
||||||
const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
|
const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
|
||||||
const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE";
|
const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" };
|
||||||
|
const char *pref = prefTbl[getPP(p->type)];
|
||||||
const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
|
const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
|
||||||
printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
|
printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
|
||||||
}
|
}
|
||||||
|
@ -1350,11 +1364,12 @@ void put()
|
||||||
{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||||
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||||
};
|
};
|
||||||
|
const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 };
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
std::string type = type2String(p->type);
|
std::string type = type2String(p->type);
|
||||||
if (p->mode & 1) {
|
if (p->mode & 1) {
|
||||||
uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0;
|
uint8_t pref = ppTbl[getPP(p->type)];
|
||||||
printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
|
printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
|
||||||
}
|
}
|
||||||
if (p->mode & 2) {
|
if (p->mode & 2) {
|
||||||
|
@ -1648,7 +1663,7 @@ void put()
|
||||||
puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
|
puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
|
||||||
|
|
||||||
puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
|
puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
|
||||||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }");
|
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
||||||
|
|
||||||
}
|
}
|
||||||
// haswell gpr(reg, reg, r/m)
|
// haswell gpr(reg, reg, r/m)
|
||||||
|
|
45
externals/xbyak/meson.build
vendored
Normal file
45
externals/xbyak/meson.build
vendored
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# SPDX-FileCopyrightText: 2021 Andrea Pappacoda
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
project(
|
||||||
|
'xbyak',
|
||||||
|
'cpp',
|
||||||
|
version: '6.06',
|
||||||
|
license: 'BSD-3-Clause',
|
||||||
|
default_options: 'b_ndebug=if-release'
|
||||||
|
)
|
||||||
|
|
||||||
|
install_subdir('xbyak', install_dir: get_option('includedir'))
|
||||||
|
|
||||||
|
xbyak_dep = declare_dependency(include_directories: include_directories('.'))
|
||||||
|
|
||||||
|
if meson.version().version_compare('>=0.54.0')
|
||||||
|
meson.override_dependency(meson.project_name(), xbyak_dep)
|
||||||
|
endif
|
||||||
|
|
||||||
|
import('pkgconfig').generate(
|
||||||
|
name: meson.project_name(),
|
||||||
|
description: 'JIT assembler for x86(IA32), x64(AMD64, x86-64)',
|
||||||
|
version: meson.project_version(),
|
||||||
|
url: 'https://github.com/herumi/xbyak'
|
||||||
|
)
|
||||||
|
|
||||||
|
if meson.version().version_compare('>=0.50.0')
|
||||||
|
cmake = import('cmake')
|
||||||
|
|
||||||
|
cmake.write_basic_package_version_file(
|
||||||
|
name: meson.project_name(),
|
||||||
|
version: meson.project_version()
|
||||||
|
)
|
||||||
|
|
||||||
|
cmake_conf = configuration_data()
|
||||||
|
cmake_conf.set('TARGET_NAME', meson.project_name() + '::' + meson.project_name())
|
||||||
|
cmake_conf.set('ABSOLUTE_INCLUDE_DIR', get_option('prefix')/get_option('includedir'))
|
||||||
|
|
||||||
|
cmake.configure_package_config_file(
|
||||||
|
name: meson.project_name(),
|
||||||
|
input: 'cmake'/'meson-config.cmake.in',
|
||||||
|
configuration: cmake_conf
|
||||||
|
)
|
||||||
|
endif
|
619
externals/xbyak/readme.md
vendored
619
externals/xbyak/readme.md
vendored
|
@ -1,6 +1,13 @@
|
||||||
[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml)
|
|
||||||
|
|
||||||
# Xbyak 5.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
# Xbyak 6.06 [![Badge Build]][Build Status]
|
||||||
|
|
||||||
|
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||||
|
|
||||||
|
## Menu
|
||||||
|
|
||||||
|
- [Install]
|
||||||
|
- [Usage]
|
||||||
|
- [Changelog]
|
||||||
|
|
||||||
## Abstract
|
## Abstract
|
||||||
|
|
||||||
|
@ -10,15 +17,23 @@ The pronunciation of Xbyak is `kəi-bja-k`.
|
||||||
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
||||||
|
|
||||||
## Feature
|
## Feature
|
||||||
* header file only
|
|
||||||
* Intel/MASM like syntax
|
- header file only
|
||||||
* fully support AVX-512
|
- Intel/MASM like syntax
|
||||||
|
- fully support AVX-512
|
||||||
|
|
||||||
**Note**:
|
**Note**:
|
||||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||||
|
|
||||||
### News
|
### News
|
||||||
|
|
||||||
|
- add movdiri, movdir64b, clwb, cldemote
|
||||||
|
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||||
|
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||||
|
- strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error.
|
||||||
|
- define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd.
|
||||||
|
- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
|
||||||
- vnni instructions such as vpdpbusd supports vex encoding.
|
- vnni instructions such as vpdpbusd supports vex encoding.
|
||||||
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
||||||
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
||||||
|
@ -27,590 +42,34 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||||
|
|
||||||
### Supported OS
|
### Supported OS
|
||||||
|
|
||||||
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
- Windows (Xp, Vista, 7, 10, 11) (32 / 64 bit)
|
||||||
* Linux(32bit, 64bit)
|
- Linux (32 / 64 bit)
|
||||||
* Intel macOS
|
- macOS (Intel CPU)
|
||||||
|
|
||||||
### Supported Compilers
|
### Supported Compilers
|
||||||
|
|
||||||
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||||
|
|
||||||
## Install
|
|
||||||
|
|
||||||
The following files are necessary. Please add the path to your compile directory.
|
|
||||||
|
|
||||||
* xbyak.h
|
|
||||||
* xbyak_mnemonic.h
|
|
||||||
* xbyak_util.h
|
|
||||||
|
|
||||||
Linux:
|
|
||||||
```
|
|
||||||
make install
|
|
||||||
```
|
|
||||||
|
|
||||||
These files are copied into `/usr/local/include/xbyak`.
|
|
||||||
|
|
||||||
## How to use it
|
|
||||||
|
|
||||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
|
||||||
```
|
|
||||||
#include <xbyak/xbyak.h>
|
|
||||||
|
|
||||||
struct Code : Xbyak::CodeGenerator {
|
|
||||||
Code(int x)
|
|
||||||
{
|
|
||||||
mov(eax, x);
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
```
|
|
||||||
Or you can pass the instance of CodeGenerator without inheriting.
|
|
||||||
```
|
|
||||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
|
||||||
using namespace Xbyak::util;
|
|
||||||
code.mov(eax, x);
|
|
||||||
code.ret();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Make an instance of the class and get the function
|
|
||||||
pointer by calling `getCode()` and call it.
|
|
||||||
```
|
|
||||||
Code c(5);
|
|
||||||
int (*f)() = c.getCode<int (*)()>();
|
|
||||||
printf("ret=%d\n", f()); // ret = 5
|
|
||||||
```
|
|
||||||
|
|
||||||
## Syntax
|
|
||||||
Similar to MASM/NASM syntax with parentheses.
|
|
||||||
|
|
||||||
```
|
|
||||||
NASM Xbyak
|
|
||||||
mov eax, ebx --> mov(eax, ebx);
|
|
||||||
inc ecx inc(ecx);
|
|
||||||
ret --> ret();
|
|
||||||
```
|
|
||||||
|
|
||||||
## Addressing
|
|
||||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
|
||||||
otherwise use `ptr`.
|
|
||||||
|
|
||||||
```
|
|
||||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
|
||||||
[rip + 32bit disp] ; x64 only
|
|
||||||
|
|
||||||
NASM Xbyak
|
|
||||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
|
||||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
|
||||||
test byte [esp], 4 --> test(byte [esp], 4);
|
|
||||||
inc qword [rax] --> inc(qword [rax]);
|
|
||||||
```
|
|
||||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
|
||||||
|
|
||||||
### How to use Selector (Segment Register)
|
|
||||||
```
|
|
||||||
mov eax, [fs:eax] --> putSeg(fs);
|
|
||||||
mov(eax, ptr [eax]);
|
|
||||||
mov ax, cs --> mov(ax, cs);
|
|
||||||
```
|
|
||||||
**Note**: Segment class is not derived from `Operand`.
|
|
||||||
|
|
||||||
## AVX
|
|
||||||
|
|
||||||
```
|
|
||||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
|
||||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
|
||||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note**:
|
|
||||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
|
||||||
But the newer version will not support it.
|
|
||||||
```
|
|
||||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
|
||||||
```
|
|
||||||
|
|
||||||
## AVX-512
|
|
||||||
|
|
||||||
```
|
|
||||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
|
||||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
|
||||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
|
||||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
|
||||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
|
||||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
|
||||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
|
||||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
|
||||||
|
|
||||||
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
|
||||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
|
||||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
|
||||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
|
||||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
|
||||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
|
||||||
|
|
||||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
|
||||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
|
||||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
|
||||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
|
||||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
|
||||||
|
|
||||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
|
||||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
|
||||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
|
||||||
|
|
||||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
|
||||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
|
||||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
|
||||||
```
|
|
||||||
### Remark
|
|
||||||
* `k1`, ..., `k7` are opmask registers.
|
|
||||||
- `k0` is dealt as no mask.
|
|
||||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
|
||||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
|
||||||
* `k4 | k3` is different from `k3 | k4`.
|
|
||||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
|
||||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
|
||||||
|
|
||||||
## Label
|
|
||||||
Two kinds of Label are supported. (String literal and Label class).
|
|
||||||
|
|
||||||
### String literal
|
|
||||||
```
|
|
||||||
L("L1");
|
|
||||||
jmp("L1");
|
|
||||||
|
|
||||||
jmp("L2");
|
|
||||||
...
|
|
||||||
a few mnemonics (8-bit displacement jmp)
|
|
||||||
...
|
|
||||||
L("L2");
|
|
||||||
|
|
||||||
jmp("L3", T_NEAR);
|
|
||||||
...
|
|
||||||
a lot of mnemonics (32-bit displacement jmp)
|
|
||||||
...
|
|
||||||
L("L3");
|
|
||||||
```
|
|
||||||
|
|
||||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
|
||||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
|
||||||
|
|
||||||
### Support `@@`, `@f`, `@b` like MASM
|
|
||||||
|
|
||||||
```
|
|
||||||
L("@@"); // <A>
|
|
||||||
jmp("@b"); // jmp to <A>
|
|
||||||
jmp("@f"); // jmp to <B>
|
|
||||||
L("@@"); // <B>
|
|
||||||
jmp("@b"); // jmp to <B>
|
|
||||||
mov(eax, "@b");
|
|
||||||
jmp(eax); // jmp to <B>
|
|
||||||
```
|
|
||||||
|
|
||||||
### Local label
|
|
||||||
|
|
||||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
|
||||||
are treated as a local label.
|
|
||||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
|
||||||
|
|
||||||
```
|
|
||||||
void func1()
|
|
||||||
{
|
|
||||||
inLocalLabel();
|
|
||||||
L(".lp"); // <A> ; local label
|
|
||||||
...
|
|
||||||
jmp(".lp"); // jmp to <A>
|
|
||||||
L("aaa"); // global label <C>
|
|
||||||
outLocalLabel();
|
|
||||||
|
|
||||||
inLocalLabel();
|
|
||||||
L(".lp"); // <B> ; local label
|
|
||||||
func1();
|
|
||||||
jmp(".lp"); // jmp to <B>
|
|
||||||
inLocalLabel();
|
|
||||||
jmp("aaa"); // jmp to <C>
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### short and long jump
|
|
||||||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
|
||||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
|
||||||
|
|
||||||
```
|
|
||||||
jmp("short-jmp"); // short jmp
|
|
||||||
// small code
|
|
||||||
L("short-jmp");
|
|
||||||
|
|
||||||
jmp("long-jmp");
|
|
||||||
// long code
|
|
||||||
L("long-jmp"); // throw exception
|
|
||||||
```
|
|
||||||
Then specify T_NEAR for jmp.
|
|
||||||
```
|
|
||||||
jmp("long-jmp", T_NEAR); // long jmp
|
|
||||||
// long code
|
|
||||||
L("long-jmp");
|
|
||||||
```
|
|
||||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
|
||||||
```
|
|
||||||
jmp("long-jmp"); // long jmp
|
|
||||||
// long code
|
|
||||||
L("long-jmp");
|
|
||||||
```
|
|
||||||
|
|
||||||
### Label class
|
|
||||||
|
|
||||||
`L()` and `jxx()` support Label class.
|
|
||||||
|
|
||||||
```
|
|
||||||
Xbyak::Label label1, label2;
|
|
||||||
L(label1);
|
|
||||||
...
|
|
||||||
jmp(label1);
|
|
||||||
...
|
|
||||||
jmp(label2);
|
|
||||||
...
|
|
||||||
L(label2);
|
|
||||||
```
|
|
||||||
|
|
||||||
Use `putL` for jmp table
|
|
||||||
```
|
|
||||||
Label labelTbl, L0, L1, L2;
|
|
||||||
mov(rax, labelTbl);
|
|
||||||
// rdx is an index of jump table
|
|
||||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
|
||||||
L(labelTbl);
|
|
||||||
putL(L0);
|
|
||||||
putL(L1);
|
|
||||||
putL(L2);
|
|
||||||
L(L0);
|
|
||||||
....
|
|
||||||
L(L1);
|
|
||||||
....
|
|
||||||
```
|
|
||||||
|
|
||||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
|
||||||
|
|
||||||
```
|
|
||||||
Label label2;
|
|
||||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
|
||||||
...
|
|
||||||
jmp(label2); // label2 is not determined here
|
|
||||||
...
|
|
||||||
assignL(label2, label1); // label2 <- label1
|
|
||||||
```
|
|
||||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
|
||||||
|
|
||||||
**Note**:
|
|
||||||
* srcLabel must be used in `L()`.
|
|
||||||
* dstLabel must not be used in `L()`.
|
|
||||||
|
|
||||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
|
||||||
```
|
|
||||||
// not AutoGrow mode
|
|
||||||
Label label;
|
|
||||||
assert(label.getAddress() == 0);
|
|
||||||
L(label);
|
|
||||||
assert(label.getAddress() == getCurr());
|
|
||||||
```
|
|
||||||
|
|
||||||
### Rip ; relative addressing
|
|
||||||
```
|
|
||||||
Label label;
|
|
||||||
mov(eax, ptr [rip + label]); // eax = 4
|
|
||||||
...
|
|
||||||
|
|
||||||
L(label);
|
|
||||||
dd(4);
|
|
||||||
```
|
|
||||||
```
|
|
||||||
int x;
|
|
||||||
...
|
|
||||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
|
||||||
```
|
|
||||||
|
|
||||||
## Code size
|
|
||||||
The default max code size is 4096 bytes.
|
|
||||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
|
||||||
|
|
||||||
```
|
|
||||||
class Quantize : public Xbyak::CodeGenerator {
|
|
||||||
public:
|
|
||||||
Quantize()
|
|
||||||
: CodeGenerator(8192)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
...
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
## User allocated memory
|
|
||||||
|
|
||||||
You can make jit code on prepared memory.
|
|
||||||
|
|
||||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
|
||||||
|
|
||||||
```
|
|
||||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
|
||||||
|
|
||||||
struct Code : Xbyak::CodeGenerator {
|
|
||||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
|
||||||
{
|
|
||||||
mov(rax, 123);
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
int main()
|
|
||||||
{
|
|
||||||
Code c;
|
|
||||||
c.setProtectModeRE(); // set memory to Read/Exec
|
|
||||||
printf("%d\n", c.getCode<int(*)()>()());
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note**: See [sample/test0.cpp](sample/test0.cpp).
|
|
||||||
|
|
||||||
### AutoGrow
|
|
||||||
|
|
||||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
|
||||||
|
|
||||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
|
||||||
```
|
|
||||||
struct Code : Xbyak::CodeGenerator {
|
|
||||||
Code()
|
|
||||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
|
||||||
{
|
|
||||||
...
|
|
||||||
}
|
|
||||||
};
|
|
||||||
Code c;
|
|
||||||
// generate code for jit
|
|
||||||
c.ready(); // mode = Read/Write/Exec
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note**:
|
|
||||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
|
||||||
|
|
||||||
### Read/Exec mode
|
|
||||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
|
||||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
|
||||||
call `setProtectModeRE()` after generating jit code.
|
|
||||||
|
|
||||||
```
|
|
||||||
struct Code : Xbyak::CodeGenerator {
|
|
||||||
Code()
|
|
||||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
|
||||||
{
|
|
||||||
mov(eax, 123);
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Code c;
|
|
||||||
c.setProtectModeRE();
|
|
||||||
...
|
|
||||||
|
|
||||||
```
|
|
||||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
|
||||||
See [protect-re.cpp](sample/protect-re.cpp).
|
|
||||||
|
|
||||||
## Exception-less mode
|
|
||||||
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
|
||||||
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
|
||||||
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
|
||||||
`CodeGenerator::reset()` calls `ClearError()`.
|
|
||||||
|
|
||||||
## Macro
|
|
||||||
|
|
||||||
* **XBYAK32** is defined on 32bit.
|
|
||||||
* **XBYAK64** is defined on 64bit.
|
|
||||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
|
||||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
|
||||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
|
||||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
|
||||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
|
||||||
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
|
||||||
|
|
||||||
## Sample
|
|
||||||
|
|
||||||
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
|
|
||||||
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
|
||||||
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
|
||||||
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
modified new BSD License
|
[BSD-3-Clause License](http://opensource.org/licenses/BSD-3-Clause)
|
||||||
http://opensource.org/licenses/BSD-3-Clause
|
|
||||||
|
|
||||||
## History
|
|
||||||
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
|
||||||
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
|
||||||
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
|
||||||
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
|
||||||
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
|
||||||
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
|
||||||
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
|
||||||
* 2020/Jul/21 ver 5.93 support exception-less mode
|
|
||||||
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
|
||||||
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
|
||||||
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
|
||||||
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
|
||||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
|
||||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
|
||||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
|
||||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
|
||||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
|
||||||
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
|
||||||
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
|
||||||
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
|
||||||
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
|
||||||
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
|
||||||
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
|
||||||
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
|
||||||
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
|
||||||
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
|
||||||
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
|
||||||
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
|
||||||
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
|
||||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
|
||||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
|
||||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
|
||||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
|
||||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
|
||||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
|
||||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
|
||||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
|
||||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
|
||||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
|
||||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
|
||||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
|
||||||
* 2018/Jul/26 ver 5.661 support mingw64
|
|
||||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
|
||||||
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
|
||||||
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
|
||||||
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
|
||||||
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
|
||||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
|
||||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
|
||||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
|
||||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
|
||||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
|
||||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
|
||||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
|
||||||
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
|
||||||
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
|
||||||
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
|
||||||
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
|
||||||
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
|
||||||
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
|
||||||
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
|
||||||
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
|
||||||
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
|
||||||
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
|
||||||
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
|
||||||
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
|
||||||
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
|
||||||
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
|
||||||
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
|
||||||
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
|
||||||
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
|
||||||
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
|
||||||
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
|
||||||
* 2016/Aug/03 ver 5.01 disable omitted operand
|
|
||||||
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
|
||||||
* 2016/Jun/13 avx-512 add mask instructions
|
|
||||||
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
|
||||||
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
|
||||||
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
|
||||||
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
|
||||||
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
|
||||||
* 2015/Oct/05 ver 4.87 support segment selectors
|
|
||||||
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
|
||||||
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
|
||||||
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
|
||||||
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
|
||||||
* 2015/May/24 ver 4.82 support detection of F16C
|
|
||||||
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
|
||||||
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
|
||||||
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
|
||||||
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
|
||||||
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
|
||||||
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
|
||||||
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
|
||||||
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
|
||||||
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
|
||||||
* 2014/Mar/16 ver 4.50 support new Label
|
|
||||||
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
|
||||||
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
|
||||||
* 2013/Oct/16 ver 4.21 label support std::string
|
|
||||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
|
||||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
|
||||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
|
||||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
|
||||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
|
||||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
|
||||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
|
||||||
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
|
||||||
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
|
||||||
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
|
||||||
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
|
||||||
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
|
||||||
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
|
||||||
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
|
||||||
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
|
||||||
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
|
||||||
* 2012/May/03 ver 3.60 change interface of Allocator
|
|
||||||
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
|
||||||
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
|
||||||
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
|
||||||
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
|
||||||
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
|
||||||
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
|
||||||
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
|
||||||
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
|
||||||
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
|
||||||
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
|
||||||
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
|
||||||
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
|
||||||
* 2011/Feb/04 ver 2.99 beta support AVX
|
|
||||||
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
|
||||||
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
|
||||||
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
|
||||||
* 2010/Jun/17 ver 2.28 move some member functions to public
|
|
||||||
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
|
||||||
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
|
||||||
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
|
||||||
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
|
||||||
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
|
||||||
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
|
||||||
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
|
||||||
* 2009/Nov/28 support a part of FPU
|
|
||||||
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
|
||||||
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
|
||||||
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
|
||||||
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
|
||||||
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
|
||||||
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
|
||||||
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
|
||||||
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
|
||||||
* 2008/Jun/02 support memory interface allocated by user
|
|
||||||
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
|
||||||
* 2008/Apr/30 add cmpxchg16b, cdqe
|
|
||||||
* 2008/Apr/29 support x64
|
|
||||||
* 2008/Apr/14 code refactoring
|
|
||||||
* 2008/Mar/12 add bsr/bsf
|
|
||||||
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
|
||||||
* 2007/Nov/5 support lock, xadd, xchg
|
|
||||||
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
|
||||||
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
|
||||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
|
||||||
* 2007/Jan/4 first version
|
|
||||||
|
|
||||||
## Author
|
## Author
|
||||||
MITSUNARI Shigeo(herumi@nifty.com)
|
|
||||||
|
#### 光成滋生 Mitsunari Shigeo
|
||||||
|
[GitHub](https://github.com/herumi) | [Website (Japanese)](http://herumi.in.coocan.jp/) | [herumi@nifty.com](mailto:herumi@nifty.com)
|
||||||
|
|
||||||
## Sponsors welcome
|
## Sponsors welcome
|
||||||
[GitHub Sponsor](https://github.com/sponsors/herumi)
|
[GitHub Sponsor](https://github.com/sponsors/herumi)
|
||||||
|
|
||||||
|
<!----------------------------------------------------------------------------->
|
||||||
|
|
||||||
|
[Badge Build]: https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg
|
||||||
|
[Build Status]: https://github.com/herumi/xbyak/actions/workflows/main.yml
|
||||||
|
|
||||||
|
[License]: COPYRIGHT
|
||||||
|
|
||||||
|
[Changelog]: doc/changelog.md
|
||||||
|
[Install]: doc/install.md
|
||||||
|
[Usage]: doc/usage.md
|
||||||
|
|
||||||
|
|
35
externals/xbyak/readme.txt
vendored
35
externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.991
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.06
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -277,6 +277,24 @@ L(label);
|
||||||
assert(label.getAddress(), getCurr());
|
assert(label.getAddress(), getCurr());
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4. farジャンプ
|
||||||
|
|
||||||
|
`jmp(mem, T_FAR)`, `call(mem, T_FAR)`, `retf()`をサポートします。
|
||||||
|
サイズを明示するために`ptr`の代わりに`word|dword|qword`を利用してください。
|
||||||
|
|
||||||
|
32bit
|
||||||
|
```
|
||||||
|
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||||
|
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||||
|
```
|
||||||
|
|
||||||
|
64bit
|
||||||
|
```
|
||||||
|
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||||
|
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||||
|
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||||
|
```
|
||||||
|
|
||||||
・Xbyak::CodeGenerator()コンストラクタインタフェース
|
・Xbyak::CodeGenerator()コンストラクタインタフェース
|
||||||
|
|
||||||
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
|
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
|
||||||
|
@ -382,6 +400,21 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2022/06/01 ver 6.06 Cpu::TypeクラスのリファクタリングとXBYAK_USE_MEMFDが定義されたときのMmapAllocatorの改善
|
||||||
|
2022/05/20 ver 6.052 Cpu::operator==()を正しく定義
|
||||||
|
2022/05/13 ver 6.051 XYBAK_NO_EXCEPTIONを定義したときのCpuクラスのコンパイルエラー修正
|
||||||
|
2022/05/12 ver 6.05 movdiri, movdir64b, clwb, cldemoteを追加
|
||||||
|
2022/04/05 ver 6.04 tpause, umonitor, umwaitを追加
|
||||||
|
2022/03/08 ver 6.03 MmapAllocatorがmemfd用のユーザ定義文字列をサポート
|
||||||
|
2022/01/28 ver 6.02 dispacementの32bit範囲チェックの厳密化
|
||||||
|
2021/12/14 ver 6.01 T_FAR jump/callとretfをサポート
|
||||||
|
2021/09/14 ver 6.00 AVX512-FP16を完全サポート
|
||||||
|
2021/09/09 ver 5.997 vrndscale*を{sae}をサポートするよう修正
|
||||||
|
2021/09/03 ver 5.996 v{add,sub,mul,div,max,min}{sd,ss}をT_rd_saeなどをサポートするよう修正
|
||||||
|
2021/08/15 ver 5.995 Linux上でXBYAK_USE_MEMFDが定義されたなら/proc/self/mapsにラベル追加
|
||||||
|
2021/06/17 ver 5.994 マスクレジスタ用のvcmpXX{ps,pd,ss,sd}のalias追加
|
||||||
|
2021/06/06 ver 5.993 gather/scatterのレジスタの組み合わせの厳密なチェック
|
||||||
|
2021/05/09 ver 5.992 endbr32とendbr64のサポート
|
||||||
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
|
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
|
||||||
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
|
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
|
||||||
2020/10/17 ver 5.98 [scale * reg]のサポート
|
2020/10/17 ver 5.98 [scale * reg]のサポート
|
||||||
|
|
6
externals/xbyak/sample/Makefile
vendored
6
externals/xbyak/sample/Makefile
vendored
|
@ -37,6 +37,7 @@ endif
|
||||||
|
|
||||||
ifneq ($(OS),mac)
|
ifneq ($(OS),mac)
|
||||||
TARGET += static_buf64
|
TARGET += static_buf64
|
||||||
|
TARGET += memfd
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,7 +52,7 @@ all: $(TARGET)
|
||||||
|
|
||||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic
|
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic
|
||||||
|
|
||||||
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
|
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||||
|
|
||||||
test:
|
test:
|
||||||
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
|
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
|
||||||
|
@ -95,6 +96,8 @@ jmp_table:
|
||||||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
|
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
|
||||||
jmp_table64:
|
jmp_table64:
|
||||||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
|
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
|
||||||
|
memfd:
|
||||||
|
$(CXX) $(CFLAGS) memfd.cpp -o $@ -m64
|
||||||
profiler: profiler.cpp ../xbyak/xbyak_util.h
|
profiler: profiler.cpp ../xbyak/xbyak_util.h
|
||||||
$(CXX) $(CFLAGS) profiler.cpp -o $@
|
$(CXX) $(CFLAGS) profiler.cpp -o $@
|
||||||
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||||
|
@ -121,3 +124,4 @@ test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||||
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||||
jmp_table: jmp_table.cpp $(XBYAK_INC)
|
jmp_table: jmp_table.cpp $(XBYAK_INC)
|
||||||
jmp_table64: jmp_table.cpp $(XBYAK_INC)
|
jmp_table64: jmp_table.cpp $(XBYAK_INC)
|
||||||
|
memfd: memfd.cpp $(XBYAK_INC)
|
||||||
|
|
39
externals/xbyak/sample/memfd.cpp
vendored
Normal file
39
externals/xbyak/sample/memfd.cpp
vendored
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
a sample to use MmapAllocator with an user-defined name
|
||||||
|
cat /proc/`psidof ./memfd`/maps
|
||||||
|
|
||||||
|
7fca70b44000-7fca70b4a000 rw-p 00000000 00:00 0
|
||||||
|
7fca70b67000-7fca70b68000 rwxs 00000000 00:05 19960170 /memfd:xyz (deleted)
|
||||||
|
7fca70b68000-7fca70b69000 rwxs 00000000 00:05 19960169 /memfd:abc (deleted)
|
||||||
|
7fca70b69000-7fca70b6a000 r--p 00029000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||||
|
7fca70b6a000-7fca70b6b000 rw-p 0002a000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||||
|
*/
|
||||||
|
#define XBYAK_USE_MEMFD
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
class Code : Xbyak::MmapAllocator, public Xbyak::CodeGenerator {
|
||||||
|
public:
|
||||||
|
Code(const char *name, int v)
|
||||||
|
: Xbyak::MmapAllocator(name)
|
||||||
|
, Xbyak::CodeGenerator(4096, nullptr, this /* specify external MmapAllocator */)
|
||||||
|
{
|
||||||
|
mov(eax, v);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
Code c1("Xbyak::abc", 123);
|
||||||
|
Code c2("Xbyak::xyz", 456);
|
||||||
|
printf("c1 %d\n", c1.getCode<int (*)()>()());
|
||||||
|
printf("c2 %d\n", c2.getCode<int (*)()>()());
|
||||||
|
std::ifstream ifs("/proc/self/maps", std::ios::binary);
|
||||||
|
if (ifs) {
|
||||||
|
std::string line;
|
||||||
|
while (std::getline(ifs, line)) {
|
||||||
|
printf("%s\n", line.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
24
externals/xbyak/sample/test_util.cpp
vendored
24
externals/xbyak/sample/test_util.cpp
vendored
|
@ -7,14 +7,13 @@ struct PopCountTest : public Xbyak::CodeGenerator {
|
||||||
PopCountTest(int n)
|
PopCountTest(int n)
|
||||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||||
{
|
{
|
||||||
ret();
|
|
||||||
mov(eax, n);
|
mov(eax, n);
|
||||||
popcnt(eax, eax);
|
popcnt(eax, eax);
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void putCPUinfo()
|
void putCPUinfo(bool onlyCpuidFeature)
|
||||||
{
|
{
|
||||||
using namespace Xbyak::util;
|
using namespace Xbyak::util;
|
||||||
Cpu cpu;
|
Cpu cpu;
|
||||||
|
@ -35,8 +34,6 @@ void putCPUinfo()
|
||||||
{ Cpu::tPOPCNT, "popcnt" },
|
{ Cpu::tPOPCNT, "popcnt" },
|
||||||
{ Cpu::t3DN, "3dn" },
|
{ Cpu::t3DN, "3dn" },
|
||||||
{ Cpu::tE3DN, "e3dn" },
|
{ Cpu::tE3DN, "e3dn" },
|
||||||
{ Cpu::tSSE4a, "sse4a" },
|
|
||||||
{ Cpu::tSSE5, "sse5" },
|
|
||||||
{ Cpu::tAESNI, "aesni" },
|
{ Cpu::tAESNI, "aesni" },
|
||||||
{ Cpu::tRDTSCP, "rdtscp" },
|
{ Cpu::tRDTSCP, "rdtscp" },
|
||||||
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
||||||
|
@ -85,11 +82,19 @@ void putCPUinfo()
|
||||||
{ Cpu::tAMX_INT8, "amx(int8)" },
|
{ Cpu::tAMX_INT8, "amx(int8)" },
|
||||||
{ Cpu::tAMX_BF16, "amx(bf16)" },
|
{ Cpu::tAMX_BF16, "amx(bf16)" },
|
||||||
{ Cpu::tAVX_VNNI, "avx_vnni" },
|
{ Cpu::tAVX_VNNI, "avx_vnni" },
|
||||||
|
{ Cpu::tAVX512_FP16, "avx512_fp16" },
|
||||||
|
{ Cpu::tWAITPKG, "waitpkg" },
|
||||||
|
{ Cpu::tCLFLUSHOPT, "clflushopt" },
|
||||||
|
{ Cpu::tCLDEMOTE, "cldemote" },
|
||||||
|
{ Cpu::tMOVDIRI, "movdiri" },
|
||||||
|
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||||
|
{ Cpu::tCLZERO, "clzero" },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
if (onlyCpuidFeature) return;
|
||||||
if (cpu.has(Cpu::tPOPCNT)) {
|
if (cpu.has(Cpu::tPOPCNT)) {
|
||||||
const int n = 0x12345678; // bitcount = 13
|
const int n = 0x12345678; // bitcount = 13
|
||||||
const int ok = 13;
|
const int ok = 13;
|
||||||
|
@ -123,12 +128,15 @@ void putCPUinfo()
|
||||||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
bool onlyCpuidFeature = argc == 2 && strcmp(argv[1], "-cpuid") == 0;
|
||||||
|
if (!onlyCpuidFeature) {
|
||||||
#ifdef XBYAK32
|
#ifdef XBYAK32
|
||||||
puts("32bit");
|
puts("32bit");
|
||||||
#else
|
#else
|
||||||
puts("64bit");
|
puts("64bit");
|
||||||
#endif
|
#endif
|
||||||
putCPUinfo();
|
}
|
||||||
|
putCPUinfo(onlyCpuidFeature);
|
||||||
}
|
}
|
||||||
|
|
9
externals/xbyak/test/Makefile
vendored
9
externals/xbyak/test/Makefile
vendored
|
@ -1,4 +1,4 @@
|
||||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception
|
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32
|
||||||
XBYAK_INC=../xbyak/xbyak.h
|
XBYAK_INC=../xbyak/xbyak.h
|
||||||
UNAME_S=$(shell uname -s)
|
UNAME_S=$(shell uname -s)
|
||||||
BIT=32
|
BIT=32
|
||||||
|
@ -22,7 +22,7 @@ all: $(TARGET)
|
||||||
|
|
||||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||||
|
|
||||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) #-std=c++0x
|
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||||
make_nm:
|
make_nm:
|
||||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||||
|
@ -41,6 +41,8 @@ bad_address: bad_address.cpp ../xbyak/xbyak.h
|
||||||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||||
misc: misc.cpp ../xbyak/xbyak.h
|
misc: misc.cpp ../xbyak/xbyak.h
|
||||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||||
|
misc32: misc.cpp ../xbyak/xbyak.h
|
||||||
|
$(CXX) $(CFLAGS) misc.cpp -o $@ -DXBYAK32
|
||||||
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
||||||
$(CXX) $(CFLAGS) $< -o $@
|
$(CXX) $(CFLAGS) $< -o $@
|
||||||
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
||||||
|
@ -62,6 +64,7 @@ ifneq ($(ONLY_64BIT),1)
|
||||||
endif
|
endif
|
||||||
./bad_address
|
./bad_address
|
||||||
./misc
|
./misc
|
||||||
|
./misc32
|
||||||
./cvt_test
|
./cvt_test
|
||||||
ifeq ($(BIT),64)
|
ifeq ($(BIT),64)
|
||||||
./test_address.sh 64
|
./test_address.sh 64
|
||||||
|
@ -95,7 +98,7 @@ test:
|
||||||
$(MAKE) test_avx512
|
$(MAKE) test_avx512
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||||
|
|
||||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||||
|
|
6
externals/xbyak/test/address.cpp
vendored
6
externals/xbyak/test/address.cpp
vendored
|
@ -31,7 +31,7 @@ void genVsib(bool isJIT)
|
||||||
"xmm3 * 8 + edi + 123",
|
"xmm3 * 8 + edi + 123",
|
||||||
"xmm2 * 2 + 5",
|
"xmm2 * 2 + 5",
|
||||||
"eax + xmm0",
|
"eax + xmm0",
|
||||||
"esp + xmm4",
|
"esp + xmm2",
|
||||||
};
|
};
|
||||||
const char *vm32yTbl[] = {
|
const char *vm32yTbl[] = {
|
||||||
"ymm0",
|
"ymm0",
|
||||||
|
@ -42,7 +42,7 @@ void genVsib(bool isJIT)
|
||||||
"ymm3 * 8 + edi + 123",
|
"ymm3 * 8 + edi + 123",
|
||||||
"ymm2 * 2 + 5",
|
"ymm2 * 2 + 5",
|
||||||
"eax + ymm0",
|
"eax + ymm0",
|
||||||
"esp + ymm4",
|
"esp + ymm2",
|
||||||
};
|
};
|
||||||
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
||||||
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
||||||
|
@ -93,7 +93,7 @@ void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
|
||||||
}
|
}
|
||||||
if (isFirst) {
|
if (isFirst) {
|
||||||
if (isJIT) printf("(void*)");
|
if (isJIT) printf("(void*)");
|
||||||
printf("0x%08X", disp);
|
printf("%d", disp);
|
||||||
} else {
|
} else {
|
||||||
if (disp >= 0) {
|
if (disp >= 0) {
|
||||||
putchar('+');
|
putchar('+');
|
||||||
|
|
15
externals/xbyak/test/jmp.cpp
vendored
15
externals/xbyak/test/jmp.cpp
vendored
|
@ -1383,3 +1383,18 @@ CYBOZU_TEST_AUTO(setDefaultJmpNEAR)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(ambiguousFarJmp)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
#ifdef XBYAK32
|
||||||
|
void genJmp() { jmp(ptr[eax], T_FAR); }
|
||||||
|
void genCall() { call(ptr[eax], T_FAR); }
|
||||||
|
#else
|
||||||
|
void genJmp() { jmp(ptr[rax], T_FAR); }
|
||||||
|
void genCall() { call(ptr[rax], T_FAR); }
|
||||||
|
#endif
|
||||||
|
} code;
|
||||||
|
CYBOZU_TEST_EXCEPTION(code.genJmp(), std::exception);
|
||||||
|
CYBOZU_TEST_EXCEPTION(code.genCall(), std::exception);
|
||||||
|
}
|
||||||
|
|
8
externals/xbyak/test/make_512.cpp
vendored
8
externals/xbyak/test/make_512.cpp
vendored
|
@ -1366,6 +1366,8 @@ public:
|
||||||
put(p, _ZMM, _ZMM, mem | _MEM);
|
put(p, _ZMM, _ZMM, mem | _MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
put("vaddss", XMM, _XMM, XMM_ER);
|
||||||
|
put("vaddsd", XMM, _XMM, XMM_ER);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void putAVX1()
|
void putAVX1()
|
||||||
|
@ -1949,14 +1951,16 @@ public:
|
||||||
put("vrndscalepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
|
put("vrndscalepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
|
||||||
put("vrndscalepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
|
put("vrndscalepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
|
||||||
put("vrndscalepd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
|
put("vrndscalepd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
|
||||||
|
put("vrndscalepd", ZMM_KZ, _ZMM | ZMM_SAE, IMM8);
|
||||||
|
|
||||||
put("vrndscaleps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
|
put("vrndscaleps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
|
||||||
put("vrndscaleps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
|
put("vrndscaleps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
|
||||||
put("vrndscaleps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
|
put("vrndscaleps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
|
||||||
|
put("vrndscaleps", ZMM_KZ, _ZMM | ZMM_SAE, IMM8);
|
||||||
|
|
||||||
put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
|
put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
|
||||||
|
|
||||||
put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
|
put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
|
||||||
|
|
||||||
put("vscalefpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
put("vscalefpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
||||||
put("vscalefpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
put("vscalefpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
||||||
|
|
50
externals/xbyak/test/make_nm.cpp
vendored
50
externals/xbyak/test/make_nm.cpp
vendored
|
@ -179,6 +179,19 @@ class Test {
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void put(const char *nm, const char *para1, uint64_t op2, const char *para3) const
|
||||||
|
{
|
||||||
|
for (int j = 0; j < bitEnd; j++) {
|
||||||
|
if ((op2 & (1ULL << j)) == 0) continue;
|
||||||
|
printf("%s ", nm);
|
||||||
|
if (isXbyak_) printf("(");
|
||||||
|
printf("%s", para1);
|
||||||
|
if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
|
||||||
|
printf(", %s", para3);
|
||||||
|
if (isXbyak_) printf("); dump();");
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
const char *get(uint64_t type) const
|
const char *get(uint64_t type) const
|
||||||
{
|
{
|
||||||
int idx = (rand() / 31) & 7;
|
int idx = (rand() / 31) & 7;
|
||||||
|
@ -499,6 +512,7 @@ class Test {
|
||||||
"cmpsb",
|
"cmpsb",
|
||||||
"cmpsw",
|
"cmpsw",
|
||||||
"cmpsd",
|
"cmpsd",
|
||||||
|
"hlt",
|
||||||
"int3",
|
"int3",
|
||||||
"leave",
|
"leave",
|
||||||
"lodsb",
|
"lodsb",
|
||||||
|
@ -623,6 +637,7 @@ class Test {
|
||||||
"fstsw",
|
"fstsw",
|
||||||
"fnstsw",
|
"fnstsw",
|
||||||
"fxrstor",
|
"fxrstor",
|
||||||
|
"clwb",
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
|
||||||
put(memTbl[i], MEM);
|
put(memTbl[i], MEM);
|
||||||
|
@ -685,6 +700,24 @@ class Test {
|
||||||
puts("pshufb xmm14, [rel label0]");
|
puts("pshufb xmm14, [rel label0]");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void putFarJmp() const
|
||||||
|
{
|
||||||
|
#ifdef XBYAK64
|
||||||
|
put("jmp", "word[rax],T_FAR", "far word [rax]");
|
||||||
|
put("jmp", "dword[rax],T_FAR", "far dword [rax]");
|
||||||
|
put("jmp", "qword[rax],T_FAR", "far qword [rax]");
|
||||||
|
|
||||||
|
put("call", "word[rax],T_FAR", "far word [rax]");
|
||||||
|
put("call", "dword[rax],T_FAR", "far dword [rax]");
|
||||||
|
put("call", "qword[rax],T_FAR", "far qword [rax]");
|
||||||
|
#else
|
||||||
|
put("jmp", "dword[eax],T_FAR", "far dword [eax]");
|
||||||
|
put("jmp", "word[eax],T_FAR", "far word [eax]");
|
||||||
|
|
||||||
|
put("call", "dword[eax],T_FAR", "far dword [eax]");
|
||||||
|
put("call", "word[eax],T_FAR", "far word [eax]");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void putMMX1() const
|
void putMMX1() const
|
||||||
|
@ -1237,6 +1270,10 @@ class Test {
|
||||||
put("mov", REG64, "0x12345678", "0x12345678");
|
put("mov", REG64, "0x12345678", "0x12345678");
|
||||||
put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
|
put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
|
||||||
put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
|
put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
|
||||||
|
|
||||||
|
put("mov", EAX, "ptr[(void*)-1]", "[-1]");
|
||||||
|
put("mov", EAX, "ptr[(void*)0x7fffffff]", "[0x7fffffff]");
|
||||||
|
put("mov", EAX, "ptr[(void*)0xffffffffffffffff]", "[0xffffffffffffffff]");
|
||||||
}
|
}
|
||||||
void putEtc() const
|
void putEtc() const
|
||||||
{
|
{
|
||||||
|
@ -1244,6 +1281,9 @@ class Test {
|
||||||
const char *p = "ret";
|
const char *p = "ret";
|
||||||
put(p);
|
put(p);
|
||||||
put(p, IMM);
|
put(p, IMM);
|
||||||
|
p = "retf";
|
||||||
|
put(p);
|
||||||
|
put(p, IMM);
|
||||||
p = "mov";
|
p = "mov";
|
||||||
put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
|
put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
|
||||||
put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
|
put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
|
||||||
|
@ -1480,6 +1520,7 @@ class Test {
|
||||||
put("pextrq", REG64|MEM, XMM, IMM);
|
put("pextrq", REG64|MEM, XMM, IMM);
|
||||||
put("pinsrq", XMM, REG64|MEM, IMM);
|
put("pinsrq", XMM, REG64|MEM, IMM);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
void putSHA() const
|
void putSHA() const
|
||||||
{
|
{
|
||||||
|
@ -2361,16 +2402,16 @@ public:
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
const char *name = p.name;
|
const char *name = p.name;
|
||||||
put(name, XMM, VM32X, XMM);
|
put(name, "xmm3", VM32X, "xmm5");
|
||||||
switch (p.mode) {
|
switch (p.mode) {
|
||||||
case y_vx_y:
|
case y_vx_y:
|
||||||
put(name, YMM, VM32X, YMM);
|
put(name, "ymm3", VM32X, "ymm5");
|
||||||
break;
|
break;
|
||||||
case y_vy_y:
|
case y_vy_y:
|
||||||
put(name, YMM, VM32Y, YMM);
|
put(name, "ymm3", VM32Y, "ymm5");
|
||||||
break;
|
break;
|
||||||
case x_vy_x:
|
case x_vy_x:
|
||||||
put(name, XMM, VM32Y, XMM);
|
put(name, "xmm3", VM32Y, "xmm5");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
printf("ERR mode=%d\n", p.mode);
|
printf("ERR mode=%d\n", p.mode);
|
||||||
|
@ -2516,6 +2557,7 @@ public:
|
||||||
#else // USE_AVX
|
#else // USE_AVX
|
||||||
|
|
||||||
putJmp();
|
putJmp();
|
||||||
|
putFarJmp();
|
||||||
|
|
||||||
#ifdef USE_YASM
|
#ifdef USE_YASM
|
||||||
|
|
||||||
|
|
1133
externals/xbyak/test/misc.cpp
vendored
1133
externals/xbyak/test/misc.cpp
vendored
File diff suppressed because it is too large
Load diff
4
externals/xbyak/test/sf_test.cpp
vendored
4
externals/xbyak/test/sf_test.cpp
vendored
|
@ -218,7 +218,7 @@ void check(int x, int y)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void verify(const Xbyak::uint8_t *f, int pNum)
|
void verify(const uint8_t *f, int pNum)
|
||||||
{
|
{
|
||||||
switch (pNum) {
|
switch (pNum) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -264,7 +264,7 @@ void testAll()
|
||||||
}
|
}
|
||||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||||
const Xbyak::uint8_t *f = code.getCurr();
|
const uint8_t *f = code.getCurr();
|
||||||
code.gen(pNum, tNum | opt, stackSize);
|
code.gen(pNum, tNum | opt, stackSize);
|
||||||
verify(f, pNum);
|
verify(f, pNum);
|
||||||
/*
|
/*
|
||||||
|
|
163
externals/xbyak/xbyak/xbyak.h
vendored
163
externals/xbyak/xbyak/xbyak.h
vendored
|
@ -95,6 +95,12 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// MFD_CLOEXEC defined only linux 3.17 or later.
|
||||||
|
// Android wraps the memfd_create syscall from API version 30.
|
||||||
|
#if !defined(MFD_CLOEXEC) || (defined(__ANDROID__) && __ANDROID_API__ < 30)
|
||||||
|
#undef XBYAK_USE_MEMFD
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
|
#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
|
||||||
#define XBYAK64_WIN
|
#define XBYAK64_WIN
|
||||||
#elif defined(__x86_64__)
|
#elif defined(__x86_64__)
|
||||||
|
@ -138,7 +144,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x5991 /* 0xABCD = A.BC(D) */
|
VERSION = 0x6060 /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -209,6 +215,7 @@ enum {
|
||||||
ERR_INVALID_MIB_ADDRESS,
|
ERR_INVALID_MIB_ADDRESS,
|
||||||
ERR_X2APIC_IS_NOT_SUPPORTED,
|
ERR_X2APIC_IS_NOT_SUPPORTED,
|
||||||
ERR_NOT_SUPPORTED,
|
ERR_NOT_SUPPORTED,
|
||||||
|
ERR_SAME_REGS_ARE_INVALID,
|
||||||
ERR_INTERNAL // Put it at last.
|
ERR_INTERNAL // Put it at last.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -261,6 +268,7 @@ inline const char *ConvertErrorToString(int err)
|
||||||
"invalid mib address",
|
"invalid mib address",
|
||||||
"x2APIC is not supported",
|
"x2APIC is not supported",
|
||||||
"not supported",
|
"not supported",
|
||||||
|
"same regs are invalid",
|
||||||
"internal error"
|
"internal error"
|
||||||
};
|
};
|
||||||
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
||||||
|
@ -285,10 +293,10 @@ inline void SetError(int err) {
|
||||||
inline void ClearError() {
|
inline void ClearError() {
|
||||||
local::GetErrorRef() = 0;
|
local::GetErrorRef() = 0;
|
||||||
}
|
}
|
||||||
inline int GetError() { return local::GetErrorRef(); }
|
inline int GetError() { return Xbyak::local::GetErrorRef(); }
|
||||||
|
|
||||||
#define XBYAK_THROW(err) { local::SetError(err); return; }
|
#define XBYAK_THROW(err) { Xbyak::local::SetError(err); return; }
|
||||||
#define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }
|
#define XBYAK_THROW_RET(err, r) { Xbyak::local::SetError(err); return r; }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
class Error : public std::exception {
|
class Error : public std::exception {
|
||||||
|
@ -377,6 +385,7 @@ enum LabelMode {
|
||||||
custom allocator
|
custom allocator
|
||||||
*/
|
*/
|
||||||
struct Allocator {
|
struct Allocator {
|
||||||
|
explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator
|
||||||
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
|
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
|
||||||
virtual void free(uint8_t *p) { AlignedFree(p); }
|
virtual void free(uint8_t *p) { AlignedFree(p); }
|
||||||
virtual ~Allocator() {}
|
virtual ~Allocator() {}
|
||||||
|
@ -408,10 +417,21 @@ inline int getMacOsVersion()
|
||||||
|
|
||||||
} // util
|
} // util
|
||||||
#endif
|
#endif
|
||||||
class MmapAllocator : Allocator {
|
class MmapAllocator : public Allocator {
|
||||||
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
|
struct Allocation {
|
||||||
SizeList sizeList_;
|
size_t size;
|
||||||
|
#if defined(XBYAK_USE_MEMFD)
|
||||||
|
// fd_ is only used with XBYAK_USE_MEMFD. We keep the file open
|
||||||
|
// during the lifetime of each allocation in order to support
|
||||||
|
// checkpoint/restore by unprivileged users.
|
||||||
|
int fd;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
const std::string name_; // only used with XBYAK_USE_MEMFD
|
||||||
|
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, Allocation> AllocationList;
|
||||||
|
AllocationList allocList_;
|
||||||
public:
|
public:
|
||||||
|
explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
|
||||||
uint8_t *alloc(size_t size)
|
uint8_t *alloc(size_t size)
|
||||||
{
|
{
|
||||||
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
|
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
|
||||||
|
@ -427,21 +447,44 @@ public:
|
||||||
const int mojaveVersion = 18;
|
const int mojaveVersion = 18;
|
||||||
if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
|
if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
|
||||||
#endif
|
#endif
|
||||||
void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0);
|
int fd = -1;
|
||||||
if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
#if defined(XBYAK_USE_MEMFD)
|
||||||
|
fd = memfd_create(name_.c_str(), MFD_CLOEXEC);
|
||||||
|
if (fd != -1) {
|
||||||
|
mode = MAP_SHARED;
|
||||||
|
if (ftruncate(fd, size) != 0) {
|
||||||
|
close(fd);
|
||||||
|
XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0);
|
||||||
|
if (p == MAP_FAILED) {
|
||||||
|
if (fd != -1) close(fd);
|
||||||
|
XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
||||||
|
}
|
||||||
assert(p);
|
assert(p);
|
||||||
sizeList_[(uintptr_t)p] = size;
|
Allocation &alloc = allocList_[(uintptr_t)p];
|
||||||
|
alloc.size = size;
|
||||||
|
#if defined(XBYAK_USE_MEMFD)
|
||||||
|
alloc.fd = fd;
|
||||||
|
#endif
|
||||||
return (uint8_t*)p;
|
return (uint8_t*)p;
|
||||||
}
|
}
|
||||||
void free(uint8_t *p)
|
void free(uint8_t *p)
|
||||||
{
|
{
|
||||||
if (p == 0) return;
|
if (p == 0) return;
|
||||||
SizeList::iterator i = sizeList_.find((uintptr_t)p);
|
AllocationList::iterator i = allocList_.find((uintptr_t)p);
|
||||||
if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
|
if (i == allocList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
|
||||||
if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP)
|
if (munmap((void*)i->first, i->second.size) < 0) XBYAK_THROW(ERR_MUNMAP)
|
||||||
sizeList_.erase(i);
|
#if defined(XBYAK_USE_MEMFD)
|
||||||
|
if (i->second.fd != -1) close(i->second.fd);
|
||||||
|
#endif
|
||||||
|
allocList_.erase(i);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
#else
|
||||||
|
typedef Allocator MmapAllocator;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class Address;
|
class Address;
|
||||||
|
@ -1557,6 +1600,7 @@ public:
|
||||||
enum LabelType {
|
enum LabelType {
|
||||||
T_SHORT,
|
T_SHORT,
|
||||||
T_NEAR,
|
T_NEAR,
|
||||||
|
T_FAR, // far jump
|
||||||
T_AUTO // T_SHORT if possible
|
T_AUTO // T_SHORT if possible
|
||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
|
@ -1605,6 +1649,11 @@ private:
|
||||||
{
|
{
|
||||||
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
|
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
|
||||||
}
|
}
|
||||||
|
static inline bool isValidSSE(const Operand& op1)
|
||||||
|
{
|
||||||
|
// SSE instructions do not support XMM16 - XMM31
|
||||||
|
return !(op1.isXMM() && op1.getIdx() >= 16);
|
||||||
|
}
|
||||||
void rex(const Operand& op1, const Operand& op2 = Operand())
|
void rex(const Operand& op1, const Operand& op2 = Operand())
|
||||||
{
|
{
|
||||||
uint8_t rex = 0;
|
uint8_t rex = 0;
|
||||||
|
@ -1635,9 +1684,10 @@ private:
|
||||||
//
|
//
|
||||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||||
T_66 = 1 << 5,
|
T_66 = 1 << 5, // pp = 1
|
||||||
T_F3 = 1 << 6,
|
T_F3 = 1 << 6, // pp = 2
|
||||||
T_F2 = 1 << 7,
|
T_F2 = T_66 | T_F3, // pp = 3
|
||||||
|
T_ER_R = 1 << 7, // reg{er}
|
||||||
T_0F = 1 << 8,
|
T_0F = 1 << 8,
|
||||||
T_0F38 = 1 << 9,
|
T_0F38 = 1 << 9,
|
||||||
T_0F3A = 1 << 10,
|
T_0F3A = 1 << 10,
|
||||||
|
@ -1658,11 +1708,17 @@ private:
|
||||||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||||
T_B32 = 1 << 26, // m32bcst
|
T_B32 = 1 << 26, // m32bcst
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
|
T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
T_VSIB = 1 << 29,
|
T_VSIB = 1 << 29,
|
||||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||||
|
T_FP16 = 1 << 31, // avx512-fp16
|
||||||
|
T_MAP5 = T_FP16 | T_0F,
|
||||||
|
T_MAP6 = T_FP16 | T_0F38,
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
|
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||||
|
uint32_t getPP(int type) const { return (type >> 5) & 3; }
|
||||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||||
{
|
{
|
||||||
int w = (type & T_W1) ? 1 : 0;
|
int w = (type & T_W1) ? 1 : 0;
|
||||||
|
@ -1671,7 +1727,7 @@ private:
|
||||||
bool b = base.isExtIdx();
|
bool b = base.isExtIdx();
|
||||||
int idx = v ? v->getIdx() : 0;
|
int idx = v ? v->getIdx() : 0;
|
||||||
if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
|
if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
|
uint32_t pp = getPP(type);
|
||||||
uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
|
uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
|
||||||
if (!b && !x && !w && (type & T_0F)) {
|
if (!b && !x && !w && (type & T_0F)) {
|
||||||
db(0xC5); db((r ? 0 : 0x80) | vvvv);
|
db(0xC5); db((r ? 0 : 0x80) | vvvv);
|
||||||
|
@ -1688,6 +1744,7 @@ private:
|
||||||
}
|
}
|
||||||
void verifyER(const Reg& r, int type) const
|
void verifyER(const Reg& r, int type) const
|
||||||
{
|
{
|
||||||
|
if ((type & T_ER_R) && r.isREG(32|64)) return;
|
||||||
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
|
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
|
||||||
XBYAK_THROW(ERR_ER_IS_INVALID)
|
XBYAK_THROW(ERR_ER_IS_INVALID)
|
||||||
}
|
}
|
||||||
|
@ -1702,9 +1759,9 @@ private:
|
||||||
{
|
{
|
||||||
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
|
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
|
||||||
int w = (type & T_EW1) ? 1 : 0;
|
int w = (type & T_EW1) ? 1 : 0;
|
||||||
uint32_t mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
|
uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
|
||||||
uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
|
if (type & T_FP16) mmm |= 4;
|
||||||
|
uint32_t pp = getPP(type);
|
||||||
int idx = v ? v->getIdx() : 0;
|
int idx = v ? v->getIdx() : 0;
|
||||||
uint32_t vvvv = ~idx;
|
uint32_t vvvv = ~idx;
|
||||||
|
|
||||||
|
@ -1727,7 +1784,7 @@ private:
|
||||||
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
|
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
|
||||||
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
||||||
if (b) {
|
if (b) {
|
||||||
disp8N = (type & T_B32) ? 4 : 8;
|
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
|
||||||
} else if (type & T_DUP) {
|
} else if (type & T_DUP) {
|
||||||
disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
|
disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1746,7 +1803,7 @@ private:
|
||||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||||
if (aaa == 0) z = 0; // clear T_z if mask is not set
|
if (aaa == 0) z = 0; // clear T_z if mask is not set
|
||||||
db(0x62);
|
db(0x62);
|
||||||
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
|
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
|
||||||
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
|
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
|
||||||
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
|
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
|
||||||
db(code);
|
db(code);
|
||||||
|
@ -1760,8 +1817,15 @@ private:
|
||||||
{
|
{
|
||||||
uint64_t disp64 = e.getDisp();
|
uint64_t disp64 = e.getDisp();
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
|
#ifdef XBYAK_OLD_DISP_CHECK
|
||||||
|
// treat 0xffffffff as 0xffffffffffffffff
|
||||||
uint64_t high = disp64 >> 32;
|
uint64_t high = disp64 >> 32;
|
||||||
if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
|
if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
|
||||||
|
#else
|
||||||
|
// displacement should be a signed 32-bit value, so also check sign bit
|
||||||
|
uint64_t high = disp64 >> 31;
|
||||||
|
if (high != 0 && high != 0x1FFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
uint32_t disp = static_cast<uint32_t>(disp64);
|
uint32_t disp = static_cast<uint32_t>(disp64);
|
||||||
const Reg& base = e.getBase();
|
const Reg& base = e.getBase();
|
||||||
|
@ -1862,6 +1926,7 @@ private:
|
||||||
template<class T>
|
template<class T>
|
||||||
void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
|
void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
|
||||||
{
|
{
|
||||||
|
if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
|
if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
if (labelMgr_.getOffset(&offset, label)) { /* label exists */
|
if (labelMgr_.getOffset(&offset, label)) { /* label exists */
|
||||||
|
@ -1882,6 +1947,7 @@ private:
|
||||||
}
|
}
|
||||||
void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
|
void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
|
||||||
{
|
{
|
||||||
|
if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (isAutoGrow()) {
|
if (isAutoGrow()) {
|
||||||
if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
|
if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
|
||||||
if (size_ + 16 >= maxSize_) growMemory();
|
if (size_ + 16 >= maxSize_) growMemory();
|
||||||
|
@ -1894,6 +1960,16 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
void opJmpOp(const Operand& op, LabelType type, int ext)
|
||||||
|
{
|
||||||
|
const int bit = 16|i32e;
|
||||||
|
if (type == T_FAR) {
|
||||||
|
if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
|
opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
|
||||||
|
} else {
|
||||||
|
opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
// reg is reg field of ModRM
|
// reg is reg field of ModRM
|
||||||
// immSize is the size for immediate value
|
// immSize is the size for immediate value
|
||||||
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
||||||
|
@ -1920,6 +1996,7 @@ private:
|
||||||
void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
|
void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
|
||||||
{
|
{
|
||||||
if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
|
if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
|
if (!isValidSSE(reg) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (pref != NONE) db(pref);
|
if (pref != NONE) db(pref);
|
||||||
if (op.isMEM()) {
|
if (op.isMEM()) {
|
||||||
opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
|
opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
|
||||||
|
@ -1930,6 +2007,7 @@ private:
|
||||||
}
|
}
|
||||||
void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
|
void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
|
||||||
{
|
{
|
||||||
|
if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (mmx.isXMM()) db(0x66);
|
if (mmx.isXMM()) db(0x66);
|
||||||
opModR(Reg32(ext), mmx, 0x0F, code);
|
opModR(Reg32(ext), mmx, 0x0F, code);
|
||||||
db(imm8);
|
db(imm8);
|
||||||
|
@ -1940,6 +2018,7 @@ private:
|
||||||
}
|
}
|
||||||
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
||||||
{
|
{
|
||||||
|
if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (pref != NONE) db(pref);
|
if (pref != NONE) db(pref);
|
||||||
if (op1.isXMM() && op2.isMEM()) {
|
if (op1.isXMM() && op2.isMEM()) {
|
||||||
opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
|
opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
|
||||||
|
@ -1951,6 +2030,7 @@ private:
|
||||||
}
|
}
|
||||||
void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
|
void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
|
||||||
{
|
{
|
||||||
|
if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||||
if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
|
if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
|
||||||
if (mmx.isXMM()) db(0x66);
|
if (mmx.isXMM()) db(0x66);
|
||||||
opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
|
opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
|
||||||
|
@ -2211,11 +2291,15 @@ private:
|
||||||
{
|
{
|
||||||
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
}
|
}
|
||||||
|
void opCvt(const Xmm& x, const Operand& op, int type, int code)
|
||||||
|
{
|
||||||
|
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
|
||||||
|
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||||
|
}
|
||||||
void opCvt2(const Xmm& x, const Operand& op, int type, int code)
|
void opCvt2(const Xmm& x, const Operand& op, int type, int code)
|
||||||
{
|
{
|
||||||
checkCvt2(x, op);
|
checkCvt2(x, op);
|
||||||
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
|
opCvt(x, op, type, code);
|
||||||
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
|
||||||
}
|
}
|
||||||
void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
|
void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
|
||||||
{
|
{
|
||||||
|
@ -2224,6 +2308,18 @@ private:
|
||||||
const Operand *p = op.isREG() ? &x : &op;
|
const Operand *p = op.isREG() ? &x : &op;
|
||||||
opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
|
opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
|
||||||
}
|
}
|
||||||
|
// (x, x/y/xword/yword), (y, z/m)
|
||||||
|
void checkCvt4(const Xmm& x, const Operand& op) const
|
||||||
|
{
|
||||||
|
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
|
}
|
||||||
|
// (x, x/y/z/xword/yword/zword)
|
||||||
|
void opCvt5(const Xmm& x, const Operand& op, int type, int code)
|
||||||
|
{
|
||||||
|
if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||||
|
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
|
||||||
|
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||||
|
}
|
||||||
const Xmm& cvtIdx0(const Operand& x) const
|
const Xmm& cvtIdx0(const Operand& x) const
|
||||||
{
|
{
|
||||||
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
|
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
|
||||||
|
@ -2261,7 +2357,11 @@ private:
|
||||||
}
|
}
|
||||||
if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
||||||
}
|
}
|
||||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
|
int i1 = x1.getIdx();
|
||||||
|
int i2 = regExp.getIndex().getIdx();
|
||||||
|
int i3 = x2.getIdx();
|
||||||
|
if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
|
||||||
|
opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code);
|
||||||
}
|
}
|
||||||
enum {
|
enum {
|
||||||
xx_yy_zz = 0,
|
xx_yy_zz = 0,
|
||||||
|
@ -2284,7 +2384,12 @@ private:
|
||||||
void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
|
void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
|
||||||
{
|
{
|
||||||
if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
|
if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
|
||||||
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
const RegExp& regExp = addr.getRegExp();
|
||||||
|
checkGather2(x, regExp.getIndex(), mode);
|
||||||
|
int maskIdx = x.getOpmaskIdx();
|
||||||
|
if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx();
|
||||||
|
if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID);
|
||||||
|
if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
|
||||||
opVex(x, 0, addr, type, code);
|
opVex(x, 0, addr, type, code);
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
|
@ -2424,13 +2529,13 @@ public:
|
||||||
|
|
||||||
// set default type of `jmp` of undefined label to T_NEAR
|
// set default type of `jmp` of undefined label to T_NEAR
|
||||||
void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
|
void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
|
||||||
void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
|
void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
|
||||||
void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
||||||
void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
|
void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
|
||||||
void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
||||||
void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
|
void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
|
||||||
|
|
||||||
void call(const Operand& op) { opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); }
|
void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
|
||||||
// call(string label), not const std::string&
|
// call(string label), not const std::string&
|
||||||
void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
|
void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
|
||||||
void call(const char *label) { call(std::string(label)); }
|
void call(const char *label) { call(std::string(label)); }
|
||||||
|
|
307
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
307
externals/xbyak/xbyak/xbyak_mnemonic.h
vendored
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "5.991"; }
|
const char *getVersionString() const { return "6.06"; }
|
||||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
@ -57,9 +57,11 @@ void cbw() { db(0x66); db(0x98); }
|
||||||
void cdq() { db(0x99); }
|
void cdq() { db(0x99); }
|
||||||
void clc() { db(0xF8); }
|
void clc() { db(0xF8); }
|
||||||
void cld() { db(0xFC); }
|
void cld() { db(0xFC); }
|
||||||
|
void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }
|
||||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||||
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||||
void cli() { db(0xFA); }
|
void cli() { db(0xFA); }
|
||||||
|
void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }
|
||||||
void clzero() { db(0x0F); db(0x01); db(0xFC); }
|
void clzero() { db(0x0F); db(0x01); db(0xFC); }
|
||||||
void cmc() { db(0xF5); }
|
void cmc() { db(0xF5); }
|
||||||
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
|
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
|
||||||
|
@ -172,6 +174,8 @@ void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM
|
||||||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||||
void emms() { db(0x0F); db(0x77); }
|
void emms() { db(0x0F); db(0x77); }
|
||||||
|
void endbr32() { db(0xF3); db(0x0F); db(0x1E); db(0xFB); }
|
||||||
|
void endbr64() { db(0xF3); db(0x0F); db(0x1E); db(0xFA); }
|
||||||
void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }
|
void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }
|
||||||
void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }
|
void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }
|
||||||
void f2xm1() { db(0xD9); db(0xF0); }
|
void f2xm1() { db(0xD9); db(0xF0); }
|
||||||
|
@ -321,6 +325,7 @@ void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op,
|
||||||
void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||||
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
|
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
|
||||||
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
|
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
|
||||||
|
void hlt() { db(0xF4); }
|
||||||
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
|
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
|
||||||
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
|
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
|
||||||
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
||||||
|
@ -498,6 +503,8 @@ void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMo
|
||||||
void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
||||||
void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
||||||
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); }
|
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); }
|
||||||
|
void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }
|
||||||
|
void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }
|
||||||
void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
|
void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
|
||||||
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
|
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
|
||||||
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
|
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
|
||||||
|
@ -717,6 +724,7 @@ void repne() { db(0xF2); }
|
||||||
void repnz() { db(0xF2); }
|
void repnz() { db(0xF2); }
|
||||||
void repz() { db(0xF3); }
|
void repz() { db(0xF3); }
|
||||||
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
|
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
|
||||||
|
void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }
|
||||||
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
|
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
|
||||||
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
|
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
|
||||||
void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }
|
void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }
|
||||||
|
@ -809,18 +817,21 @@ void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM
|
||||||
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
|
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
|
||||||
void sysenter() { db(0x0F); db(0x34); }
|
void sysenter() { db(0x0F); db(0x34); }
|
||||||
void sysexit() { db(0x0F); db(0x35); }
|
void sysexit() { db(0x0F); db(0x35); }
|
||||||
|
void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||||
void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
|
void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
|
||||||
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
|
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
|
||||||
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
|
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
|
||||||
void ud2() { db(0x0F); db(0x0B); }
|
void ud2() { db(0x0F); db(0x0B); }
|
||||||
|
void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||||
|
void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||||
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
|
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
|
||||||
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
|
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
|
||||||
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
|
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
|
||||||
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
|
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
|
||||||
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
|
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
|
||||||
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
|
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
|
||||||
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x58); }
|
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x58); }
|
||||||
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); }
|
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x58); }
|
||||||
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
|
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
|
||||||
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
|
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
|
||||||
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
|
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
|
||||||
|
@ -982,7 +993,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_
|
||||||
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
||||||
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
||||||
void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
|
void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
|
||||||
void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
|
void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }
|
||||||
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
|
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
|
||||||
void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
|
void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
|
||||||
void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
|
void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
|
||||||
|
@ -995,8 +1006,8 @@ void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()
|
||||||
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
|
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
|
||||||
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
|
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
|
||||||
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
|
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
|
||||||
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); }
|
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5E); }
|
||||||
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); }
|
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5E); }
|
||||||
void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
|
void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
|
||||||
void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
|
void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
|
||||||
void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
|
void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
|
||||||
|
@ -1085,12 +1096,12 @@ void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_X
|
||||||
void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); }
|
void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); }
|
||||||
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
|
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
|
||||||
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
|
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
|
||||||
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5F); }
|
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5F); }
|
||||||
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5F); }
|
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5F); }
|
||||||
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
|
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
|
||||||
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
|
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
|
||||||
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
|
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5D); }
|
||||||
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
|
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5D); }
|
||||||
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||||
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
|
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
|
||||||
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||||
|
@ -1136,8 +1147,8 @@ void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T
|
||||||
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
|
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
|
||||||
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
|
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
|
||||||
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
|
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
|
||||||
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); }
|
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
|
||||||
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x59); }
|
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x59); }
|
||||||
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
|
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
|
||||||
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
|
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
|
||||||
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
|
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
|
||||||
|
@ -1320,8 +1331,8 @@ void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
|
||||||
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }
|
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }
|
||||||
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
|
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
|
||||||
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
|
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
|
||||||
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5C); }
|
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5C); }
|
||||||
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5C); }
|
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5C); }
|
||||||
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0F); }
|
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0F); }
|
||||||
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0E); }
|
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0E); }
|
||||||
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
|
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
|
||||||
|
@ -1739,6 +1750,8 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
|
||||||
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
|
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
|
||||||
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
|
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
|
||||||
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
|
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
|
||||||
|
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
|
||||||
|
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); }
|
||||||
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||||
void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||||
void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
|
void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
|
||||||
|
@ -1753,41 +1766,206 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
|
||||||
void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
|
void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
|
||||||
void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
|
void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
|
||||||
void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
|
void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
|
||||||
|
void vcmpeq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 16); }
|
||||||
|
void vcmpeq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 16); }
|
||||||
|
void vcmpeq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 16); }
|
||||||
|
void vcmpeq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 16); }
|
||||||
|
void vcmpeq_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 8); }
|
||||||
|
void vcmpeq_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 8); }
|
||||||
|
void vcmpeq_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 8); }
|
||||||
|
void vcmpeq_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 8); }
|
||||||
|
void vcmpeq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 24); }
|
||||||
|
void vcmpeq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 24); }
|
||||||
|
void vcmpeq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 24); }
|
||||||
|
void vcmpeq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 24); }
|
||||||
|
void vcmpeqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 0); }
|
||||||
|
void vcmpeqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 0); }
|
||||||
|
void vcmpeqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 0); }
|
||||||
|
void vcmpeqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 0); }
|
||||||
|
void vcmpfalse_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 27); }
|
||||||
|
void vcmpfalse_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 27); }
|
||||||
|
void vcmpfalse_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 27); }
|
||||||
|
void vcmpfalse_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 27); }
|
||||||
|
void vcmpfalsepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 11); }
|
||||||
|
void vcmpfalseps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 11); }
|
||||||
|
void vcmpfalsesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 11); }
|
||||||
|
void vcmpfalsess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 11); }
|
||||||
|
void vcmpge_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 29); }
|
||||||
|
void vcmpge_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 29); }
|
||||||
|
void vcmpge_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 29); }
|
||||||
|
void vcmpge_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 29); }
|
||||||
|
void vcmpgepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 13); }
|
||||||
|
void vcmpgeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 13); }
|
||||||
|
void vcmpgesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 13); }
|
||||||
|
void vcmpgess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 13); }
|
||||||
|
void vcmpgt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 30); }
|
||||||
|
void vcmpgt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 30); }
|
||||||
|
void vcmpgt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 30); }
|
||||||
|
void vcmpgt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 30); }
|
||||||
|
void vcmpgtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 14); }
|
||||||
|
void vcmpgtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 14); }
|
||||||
|
void vcmpgtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 14); }
|
||||||
|
void vcmpgtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 14); }
|
||||||
|
void vcmple_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 18); }
|
||||||
|
void vcmple_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 18); }
|
||||||
|
void vcmple_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 18); }
|
||||||
|
void vcmple_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 18); }
|
||||||
|
void vcmplepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 2); }
|
||||||
|
void vcmpleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 2); }
|
||||||
|
void vcmplesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 2); }
|
||||||
|
void vcmpless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 2); }
|
||||||
|
void vcmplt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 17); }
|
||||||
|
void vcmplt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 17); }
|
||||||
|
void vcmplt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 17); }
|
||||||
|
void vcmplt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 17); }
|
||||||
|
void vcmpltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 1); }
|
||||||
|
void vcmpltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 1); }
|
||||||
|
void vcmpltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 1); }
|
||||||
|
void vcmpltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 1); }
|
||||||
|
void vcmpneq_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 12); }
|
||||||
|
void vcmpneq_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 12); }
|
||||||
|
void vcmpneq_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 12); }
|
||||||
|
void vcmpneq_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 12); }
|
||||||
|
void vcmpneq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 28); }
|
||||||
|
void vcmpneq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 28); }
|
||||||
|
void vcmpneq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 28); }
|
||||||
|
void vcmpneq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 28); }
|
||||||
|
void vcmpneq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 20); }
|
||||||
|
void vcmpneq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 20); }
|
||||||
|
void vcmpneq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 20); }
|
||||||
|
void vcmpneq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 20); }
|
||||||
|
void vcmpneqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 4); }
|
||||||
|
void vcmpneqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 4); }
|
||||||
|
void vcmpneqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 4); }
|
||||||
|
void vcmpneqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 4); }
|
||||||
|
void vcmpnge_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 25); }
|
||||||
|
void vcmpnge_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 25); }
|
||||||
|
void vcmpnge_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 25); }
|
||||||
|
void vcmpnge_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 25); }
|
||||||
|
void vcmpngepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 9); }
|
||||||
|
void vcmpngeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 9); }
|
||||||
|
void vcmpngesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 9); }
|
||||||
|
void vcmpngess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 9); }
|
||||||
|
void vcmpngt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 26); }
|
||||||
|
void vcmpngt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 26); }
|
||||||
|
void vcmpngt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 26); }
|
||||||
|
void vcmpngt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 26); }
|
||||||
|
void vcmpngtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 10); }
|
||||||
|
void vcmpngtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 10); }
|
||||||
|
void vcmpngtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 10); }
|
||||||
|
void vcmpngtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 10); }
|
||||||
|
void vcmpnle_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 22); }
|
||||||
|
void vcmpnle_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 22); }
|
||||||
|
void vcmpnle_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 22); }
|
||||||
|
void vcmpnle_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 22); }
|
||||||
|
void vcmpnlepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 6); }
|
||||||
|
void vcmpnleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 6); }
|
||||||
|
void vcmpnlesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 6); }
|
||||||
|
void vcmpnless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 6); }
|
||||||
|
void vcmpnlt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 21); }
|
||||||
|
void vcmpnlt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 21); }
|
||||||
|
void vcmpnlt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 21); }
|
||||||
|
void vcmpnlt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 21); }
|
||||||
|
void vcmpnltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 5); }
|
||||||
|
void vcmpnltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 5); }
|
||||||
|
void vcmpnltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 5); }
|
||||||
|
void vcmpnltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 5); }
|
||||||
|
void vcmpord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 23); }
|
||||||
|
void vcmpord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 23); }
|
||||||
|
void vcmpord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 23); }
|
||||||
|
void vcmpord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 23); }
|
||||||
|
void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 7); }
|
||||||
|
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
|
||||||
|
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
|
||||||
|
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
|
||||||
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
|
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
|
||||||
|
void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0xC2, imm); }
|
||||||
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
|
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
|
||||||
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||||
|
void vcmpsh(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N2 | T_F3 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xC2, imm); }
|
||||||
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||||
|
void vcmptrue_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 31); }
|
||||||
|
void vcmptrue_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 31); }
|
||||||
|
void vcmptrue_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 31); }
|
||||||
|
void vcmptrue_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 31); }
|
||||||
|
void vcmptruepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 15); }
|
||||||
|
void vcmptrueps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 15); }
|
||||||
|
void vcmptruesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 15); }
|
||||||
|
void vcmptruess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 15); }
|
||||||
|
void vcmpunord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 19); }
|
||||||
|
void vcmpunord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 19); }
|
||||||
|
void vcmpunord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 19); }
|
||||||
|
void vcmpunord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 19); }
|
||||||
|
void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 3); }
|
||||||
|
void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); }
|
||||||
|
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
|
||||||
|
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
|
||||||
|
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
|
||||||
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
|
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
|
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
|
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||||
|
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
||||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||||
|
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
||||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
|
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||||
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||||
void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }
|
void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x5B); }
|
||||||
|
void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x5A); }
|
||||||
|
void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x13); }
|
||||||
|
void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x7B); }
|
||||||
|
void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x79); }
|
||||||
|
void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x79); }
|
||||||
|
void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||||
|
void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||||
|
void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x1D); }
|
||||||
|
void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x7B); }
|
||||||
void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x79); }
|
void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x79); }
|
||||||
void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }
|
void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x79); }
|
||||||
void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0xE6); }
|
void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0xE6); }
|
||||||
void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }
|
void vcvtqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
|
||||||
void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }
|
void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
|
||||||
void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }
|
void vcvtsd2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_MAP5 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x5A); }
|
||||||
|
void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||||
|
void vcvtsh2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x5A); }
|
||||||
|
void vcvtsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2D); }
|
||||||
|
void vcvtsh2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x13); }
|
||||||
|
void vcvtsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||||
|
void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); }
|
||||||
|
void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x1D); }
|
||||||
|
void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||||
void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||||
void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }
|
void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
|
||||||
void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
|
void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
|
||||||
void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }
|
void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x5B); }
|
||||||
|
void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x7A); }
|
||||||
|
void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x78); }
|
||||||
|
void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x78); }
|
||||||
|
void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
|
||||||
|
void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
|
||||||
|
void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x7A); }
|
||||||
void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); }
|
void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); }
|
||||||
void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }
|
void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x78); }
|
||||||
void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }
|
void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||||
void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }
|
void vcvttsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2C); }
|
||||||
void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }
|
void vcvttsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||||
|
void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||||
|
void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7A); }
|
||||||
|
void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
|
||||||
void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
|
void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
|
||||||
void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||||
void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }
|
void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||||
|
void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||||
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||||
|
void vcvtusi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x7B); }
|
||||||
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||||
|
void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||||
|
void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||||
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
|
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
|
||||||
|
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
|
||||||
|
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); }
|
||||||
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
||||||
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
|
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
|
||||||
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
|
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
|
||||||
|
@ -1801,13 +1979,49 @@ void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Op
|
||||||
void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
||||||
void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
|
void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
|
||||||
void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
||||||
|
void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
|
||||||
|
void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
|
||||||
void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
|
void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
|
||||||
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
||||||
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||||
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||||
|
void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x98); }
|
||||||
|
void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x99); }
|
||||||
|
void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA8); }
|
||||||
|
void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xA9); }
|
||||||
|
void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB8); }
|
||||||
|
void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xB9); }
|
||||||
|
void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
|
||||||
|
void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x96); }
|
||||||
|
void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA6); }
|
||||||
|
void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB6); }
|
||||||
|
void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9A); }
|
||||||
|
void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9B); }
|
||||||
|
void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAA); }
|
||||||
|
void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAB); }
|
||||||
|
void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBA); }
|
||||||
|
void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBB); }
|
||||||
|
void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x97); }
|
||||||
|
void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA7); }
|
||||||
|
void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB7); }
|
||||||
|
void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
|
||||||
|
void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9C); }
|
||||||
|
void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9D); }
|
||||||
|
void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAC); }
|
||||||
|
void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAD); }
|
||||||
|
void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBC); }
|
||||||
|
void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBD); }
|
||||||
|
void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9E); }
|
||||||
|
void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9F); }
|
||||||
|
void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAE); }
|
||||||
|
void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAF); }
|
||||||
|
void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBE); }
|
||||||
|
void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBF); }
|
||||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||||
|
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
|
||||||
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||||
|
void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }
|
||||||
void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
||||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
|
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
|
||||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
|
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
|
||||||
|
@ -1822,12 +2036,16 @@ void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 |
|
||||||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
|
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
|
||||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
|
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
|
||||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
||||||
|
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x42); }
|
||||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
||||||
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||||
|
void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||||
void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||||
void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
|
void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
|
||||||
|
void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x26, imm); }
|
||||||
void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
|
void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
|
||||||
void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||||
|
void vgetmantsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||||
void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||||
void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
|
void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
|
||||||
void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
|
void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
|
||||||
|
@ -1837,6 +2055,10 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm)
|
||||||
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||||
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
||||||
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||||
|
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5F); }
|
||||||
|
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5F); }
|
||||||
|
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5D); }
|
||||||
|
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5D); }
|
||||||
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
|
@ -1849,6 +2071,14 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
|
||||||
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||||
|
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX | T_M_K, 0x11); }
|
||||||
|
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
|
||||||
|
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
|
||||||
|
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
|
||||||
|
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
|
||||||
|
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x6E); }
|
||||||
|
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
|
||||||
|
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
|
||||||
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
||||||
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
|
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
|
||||||
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
||||||
|
@ -2006,14 +2236,20 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
|
||||||
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
|
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
|
||||||
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
||||||
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
||||||
|
void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4C); }
|
||||||
|
void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4D); }
|
||||||
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
|
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
|
||||||
|
void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x56, imm); }
|
||||||
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
|
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
|
||||||
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||||
|
void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||||
void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||||
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
|
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x09, imm); }
|
||||||
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
|
void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x08, imm); }
|
||||||
void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); }
|
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x08, imm); }
|
||||||
void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); }
|
void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x0B, imm); }
|
||||||
|
void vrndscalesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
|
||||||
|
void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
|
||||||
void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
|
void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
|
||||||
void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
|
void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
|
||||||
void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
|
void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
|
||||||
|
@ -2022,9 +2258,13 @@ void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 |
|
||||||
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
|
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
|
||||||
void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
||||||
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
||||||
|
void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4E); }
|
||||||
|
void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4F); }
|
||||||
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
|
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
|
||||||
|
void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x2C); }
|
||||||
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
||||||
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||||
|
void vscalefsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||||
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
|
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
|
||||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
|
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
|
||||||
|
@ -2042,6 +2282,11 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
|
||||||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||||
|
void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x51); }
|
||||||
|
void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x51); }
|
||||||
|
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||||
|
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
||||||
|
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
||||||
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
|
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
|
||||||
|
|
306
externals/xbyak/xbyak/xbyak_util.h
vendored
306
externals/xbyak/xbyak/xbyak_util.h
vendored
|
@ -17,7 +17,6 @@
|
||||||
utility class and functions for Xbyak
|
utility class and functions for Xbyak
|
||||||
Xbyak::util::Clock ; rdtsc timer
|
Xbyak::util::Clock ; rdtsc timer
|
||||||
Xbyak::util::Cpu ; detect CPU
|
Xbyak::util::Cpu ; detect CPU
|
||||||
@note this header is UNDER CONSTRUCTION!
|
|
||||||
*/
|
*/
|
||||||
#include "xbyak.h"
|
#include "xbyak.h"
|
||||||
#endif // XBYAK_ONLY_CLASS_CPU
|
#endif // XBYAK_ONLY_CLASS_CPU
|
||||||
|
@ -27,8 +26,8 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _WIN32
|
||||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
#if defined(_MSC_VER) && (_MSC_VER < 1400) && defined(XBYAK32)
|
||||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||||
{
|
{
|
||||||
__asm {
|
__asm {
|
||||||
|
@ -88,32 +87,64 @@ typedef enum {
|
||||||
CoreLevel = 2
|
CoreLevel = 2
|
||||||
} IntelCpuTopologyLevel;
|
} IntelCpuTopologyLevel;
|
||||||
|
|
||||||
|
namespace local {
|
||||||
|
|
||||||
|
template<uint64_t L, uint64_t H = 0>
|
||||||
|
struct TypeT {
|
||||||
|
};
|
||||||
|
|
||||||
|
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||||
|
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||||
|
|
||||||
|
} // local
|
||||||
|
|
||||||
/**
|
/**
|
||||||
CPU detection class
|
CPU detection class
|
||||||
|
@note static inline const member is supported by c++17 or later, so use template hack
|
||||||
*/
|
*/
|
||||||
class Cpu {
|
class Cpu {
|
||||||
uint64_t type_;
|
public:
|
||||||
|
class Type {
|
||||||
|
uint64_t L;
|
||||||
|
uint64_t H;
|
||||||
|
public:
|
||||||
|
Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { }
|
||||||
|
template<uint64_t L_, uint64_t H_>
|
||||||
|
Type(local::TypeT<L_, H_>) : L(L_), H(H_) {}
|
||||||
|
Type& operator&=(const Type& rhs) { L &= rhs.L; H &= rhs.H; return *this; }
|
||||||
|
Type& operator|=(const Type& rhs) { L |= rhs.L; H |= rhs.H; return *this; }
|
||||||
|
Type operator&(const Type& rhs) const { Type t = *this; t &= rhs; return t; }
|
||||||
|
Type operator|(const Type& rhs) const { Type t = *this; t |= rhs; return t; }
|
||||||
|
bool operator==(const Type& rhs) const { return H == rhs.H && L == rhs.L; }
|
||||||
|
bool operator!=(const Type& rhs) const { return !operator==(rhs); }
|
||||||
|
// without explicit because backward compatilibity
|
||||||
|
operator bool() const { return (H | L) != 0; }
|
||||||
|
uint64_t getL() const { return L; }
|
||||||
|
uint64_t getH() const { return H; }
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
Type type_;
|
||||||
//system topology
|
//system topology
|
||||||
bool x2APIC_supported_;
|
bool x2APIC_supported_;
|
||||||
static const size_t maxTopologyLevels = 2;
|
static const size_t maxTopologyLevels = 2;
|
||||||
unsigned int numCores_[maxTopologyLevels];
|
uint32_t numCores_[maxTopologyLevels];
|
||||||
|
|
||||||
static const unsigned int maxNumberCacheLevels = 10;
|
static const uint32_t maxNumberCacheLevels = 10;
|
||||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
uint32_t dataCacheSize_[maxNumberCacheLevels];
|
||||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
uint32_t coresSharignDataCache_[maxNumberCacheLevels];
|
||||||
unsigned int dataCacheLevels_;
|
uint32_t dataCacheLevels_;
|
||||||
|
|
||||||
unsigned int get32bitAsBE(const char *x) const
|
uint32_t get32bitAsBE(const char *x) const
|
||||||
{
|
{
|
||||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||||
}
|
}
|
||||||
unsigned int mask(int n) const
|
uint32_t mask(int n) const
|
||||||
{
|
{
|
||||||
return (1U << n) - 1;
|
return (1U << n) - 1;
|
||||||
}
|
}
|
||||||
void setFamily()
|
void setFamily()
|
||||||
{
|
{
|
||||||
unsigned int data[4] = {};
|
uint32_t data[4] = {};
|
||||||
getCpuid(1, data);
|
getCpuid(1, data);
|
||||||
stepping = data[0] & mask(4);
|
stepping = data[0] & mask(4);
|
||||||
model = (data[0] >> 4) & mask(4);
|
model = (data[0] >> 4) & mask(4);
|
||||||
|
@ -132,15 +163,15 @@ class Cpu {
|
||||||
displayModel = model;
|
displayModel = model;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
|
uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end)
|
||||||
{
|
{
|
||||||
return (val >> base) & ((1u << (end - base)) - 1);
|
return (val >> base) & ((1u << (end - base)) - 1);
|
||||||
}
|
}
|
||||||
void setNumCores()
|
void setNumCores()
|
||||||
{
|
{
|
||||||
if ((type_ & tINTEL) == 0) return;
|
if (!has(tINTEL)) return;
|
||||||
|
|
||||||
unsigned int data[4] = {};
|
uint32_t data[4] = {};
|
||||||
|
|
||||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||||
getCpuidEx(0x0, 0, data);
|
getCpuidEx(0x0, 0, data);
|
||||||
|
@ -152,7 +183,7 @@ class Cpu {
|
||||||
leaf 0xB can be zeroed-out by a hypervisor
|
leaf 0xB can be zeroed-out by a hypervisor
|
||||||
*/
|
*/
|
||||||
x2APIC_supported_ = true;
|
x2APIC_supported_ = true;
|
||||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
for (uint32_t i = 0; i < maxTopologyLevels; i++) {
|
||||||
getCpuidEx(0xB, i, data);
|
getCpuidEx(0xB, i, data);
|
||||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||||
if (level == SmtLevel || level == CoreLevel) {
|
if (level == SmtLevel || level == CoreLevel) {
|
||||||
|
@ -176,14 +207,14 @@ class Cpu {
|
||||||
}
|
}
|
||||||
void setCacheHierarchy()
|
void setCacheHierarchy()
|
||||||
{
|
{
|
||||||
if ((type_ & tINTEL) == 0) return;
|
if (!has(tINTEL)) return;
|
||||||
const unsigned int NO_CACHE = 0;
|
const uint32_t NO_CACHE = 0;
|
||||||
const unsigned int DATA_CACHE = 1;
|
const uint32_t DATA_CACHE = 1;
|
||||||
// const unsigned int INSTRUCTION_CACHE = 2;
|
// const uint32_t INSTRUCTION_CACHE = 2;
|
||||||
const unsigned int UNIFIED_CACHE = 3;
|
const uint32_t UNIFIED_CACHE = 3;
|
||||||
unsigned int smt_width = 0;
|
uint32_t smt_width = 0;
|
||||||
unsigned int logical_cores = 0;
|
uint32_t logical_cores = 0;
|
||||||
unsigned int data[4] = {};
|
uint32_t data[4] = {};
|
||||||
|
|
||||||
if (x2APIC_supported_) {
|
if (x2APIC_supported_) {
|
||||||
smt_width = numCores_[0];
|
smt_width = numCores_[0];
|
||||||
|
@ -201,10 +232,10 @@ class Cpu {
|
||||||
*/
|
*/
|
||||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||||
getCpuidEx(0x4, i, data);
|
getCpuidEx(0x4, i, data);
|
||||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
uint32_t cacheType = extractBit(data[0], 0, 4);
|
||||||
if (cacheType == NO_CACHE) break;
|
if (cacheType == NO_CACHE) break;
|
||||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||||
}
|
}
|
||||||
|
@ -231,7 +262,7 @@ public:
|
||||||
int displayFamily; // family + extFamily
|
int displayFamily; // family + extFamily
|
||||||
int displayModel; // model + extModel
|
int displayModel; // model + extModel
|
||||||
|
|
||||||
unsigned int getNumCores(IntelCpuTopologyLevel level) const {
|
uint32_t getNumCores(IntelCpuTopologyLevel level) const {
|
||||||
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||||
switch (level) {
|
switch (level) {
|
||||||
case SmtLevel: return numCores_[level - 1];
|
case SmtLevel: return numCores_[level - 1];
|
||||||
|
@ -240,13 +271,13 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
uint32_t getDataCacheLevels() const { return dataCacheLevels_; }
|
||||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
uint32_t getCoresSharingDataCache(uint32_t i) const
|
||||||
{
|
{
|
||||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||||
return coresSharignDataCache_[i];
|
return coresSharignDataCache_[i];
|
||||||
}
|
}
|
||||||
unsigned int getDataCacheSize(unsigned int i) const
|
uint32_t getDataCacheSize(uint32_t i) const
|
||||||
{
|
{
|
||||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||||
return dataCacheSize_[i];
|
return dataCacheSize_[i];
|
||||||
|
@ -255,10 +286,10 @@ public:
|
||||||
/*
|
/*
|
||||||
data[] = { eax, ebx, ecx, edx }
|
data[] = { eax, ebx, ecx, edx }
|
||||||
*/
|
*/
|
||||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
static inline void getCpuid(uint32_t eaxIn, uint32_t data[4])
|
||||||
{
|
{
|
||||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _WIN32
|
||||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
@ -268,7 +299,7 @@ public:
|
||||||
(void)data;
|
(void)data;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
||||||
{
|
{
|
||||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
@ -288,7 +319,7 @@ public:
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
return _xgetbv(0);
|
return _xgetbv(0);
|
||||||
#else
|
#else
|
||||||
unsigned int eax, edx;
|
uint32_t eax, edx;
|
||||||
// xgetvb is not support on gcc 4.2
|
// xgetvb is not support on gcc 4.2
|
||||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||||
|
@ -298,77 +329,78 @@ public:
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
typedef uint64_t Type;
|
|
||||||
|
|
||||||
static const Type NONE = 0;
|
static const local::TypeT<0> NONE;
|
||||||
static const Type tMMX = 1 << 0;
|
static const local::TypeT<1 << 0> tMMX;
|
||||||
static const Type tMMX2 = 1 << 1;
|
static const local::TypeT<1 << 1> tMMX2;
|
||||||
static const Type tCMOV = 1 << 2;
|
static const local::TypeT<1 << 2> tCMOV;
|
||||||
static const Type tSSE = 1 << 3;
|
static const local::TypeT<1 << 3> tSSE;
|
||||||
static const Type tSSE2 = 1 << 4;
|
static const local::TypeT<1 << 4> tSSE2;
|
||||||
static const Type tSSE3 = 1 << 5;
|
static const local::TypeT<1 << 5> tSSE3;
|
||||||
static const Type tSSSE3 = 1 << 6;
|
static const local::TypeT<1 << 6> tSSSE3;
|
||||||
static const Type tSSE41 = 1 << 7;
|
static const local::TypeT<1 << 7> tSSE41;
|
||||||
static const Type tSSE42 = 1 << 8;
|
static const local::TypeT<1 << 8> tSSE42;
|
||||||
static const Type tPOPCNT = 1 << 9;
|
static const local::TypeT<1 << 9> tPOPCNT;
|
||||||
static const Type tAESNI = 1 << 10;
|
static const local::TypeT<1 << 10> tAESNI;
|
||||||
static const Type tSSE5 = 1 << 11;
|
static const local::TypeT<1 << 11> tAVX512_FP16;
|
||||||
static const Type tOSXSAVE = 1 << 12;
|
static const local::TypeT<1 << 12> tOSXSAVE;
|
||||||
static const Type tPCLMULQDQ = 1 << 13;
|
static const local::TypeT<1 << 13> tPCLMULQDQ;
|
||||||
static const Type tAVX = 1 << 14;
|
static const local::TypeT<1 << 14> tAVX;
|
||||||
static const Type tFMA = 1 << 15;
|
static const local::TypeT<1 << 15> tFMA;
|
||||||
|
static const local::TypeT<1 << 16> t3DN;
|
||||||
static const Type t3DN = 1 << 16;
|
static const local::TypeT<1 << 17> tE3DN;
|
||||||
static const Type tE3DN = 1 << 17;
|
static const local::TypeT<1 << 18> tWAITPKG;
|
||||||
static const Type tSSE4a = 1 << 18;
|
static const local::TypeT<1 << 19> tRDTSCP;
|
||||||
static const Type tRDTSCP = 1 << 19;
|
static const local::TypeT<1 << 20> tAVX2;
|
||||||
static const Type tAVX2 = 1 << 20;
|
static const local::TypeT<1 << 21> tBMI1; // andn, bextr, blsi, blsmsk, blsr, tzcnt
|
||||||
static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
|
static const local::TypeT<1 << 22> tBMI2; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
|
||||||
static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
|
static const local::TypeT<1 << 23> tLZCNT;
|
||||||
static const Type tLZCNT = 1 << 23;
|
static const local::TypeT<1 << 24> tINTEL;
|
||||||
|
static const local::TypeT<1 << 25> tAMD;
|
||||||
static const Type tINTEL = 1 << 24;
|
static const local::TypeT<1 << 26> tENHANCED_REP; // enhanced rep movsb/stosb
|
||||||
static const Type tAMD = 1 << 25;
|
static const local::TypeT<1 << 27> tRDRAND;
|
||||||
|
static const local::TypeT<1 << 28> tADX; // adcx, adox
|
||||||
static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
|
static const local::TypeT<1 << 29> tRDSEED; // rdseed
|
||||||
static const Type tRDRAND = 1 << 27;
|
static const local::TypeT<1 << 30> tSMAP; // stac
|
||||||
static const Type tADX = 1 << 28; // adcx, adox
|
static const local::TypeT<uint64_t(1) << 31> tHLE; // xacquire, xrelease, xtest
|
||||||
static const Type tRDSEED = 1 << 29; // rdseed
|
static const local::TypeT<uint64_t(1) << 32> tRTM; // xbegin, xend, xabort
|
||||||
static const Type tSMAP = 1 << 30; // stac
|
static const local::TypeT<uint64_t(1) << 33> tF16C; // vcvtph2ps, vcvtps2ph
|
||||||
static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
|
static const local::TypeT<uint64_t(1) << 34> tMOVBE; // mobve
|
||||||
static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
|
static const local::TypeT<uint64_t(1) << 35> tAVX512F;
|
||||||
static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
|
static const local::TypeT<uint64_t(1) << 36> tAVX512DQ;
|
||||||
static const Type tMOVBE = uint64_t(1) << 34; // mobve
|
static const local::TypeT<uint64_t(1) << 37> tAVX512_IFMA;
|
||||||
static const Type tAVX512F = uint64_t(1) << 35;
|
static const local::TypeT<uint64_t(1) << 37> tAVX512IFMA;// = tAVX512_IFMA;
|
||||||
static const Type tAVX512DQ = uint64_t(1) << 36;
|
static const local::TypeT<uint64_t(1) << 38> tAVX512PF;
|
||||||
static const Type tAVX512_IFMA = uint64_t(1) << 37;
|
static const local::TypeT<uint64_t(1) << 39> tAVX512ER;
|
||||||
static const Type tAVX512IFMA = tAVX512_IFMA;
|
static const local::TypeT<uint64_t(1) << 40> tAVX512CD;
|
||||||
static const Type tAVX512PF = uint64_t(1) << 38;
|
static const local::TypeT<uint64_t(1) << 41> tAVX512BW;
|
||||||
static const Type tAVX512ER = uint64_t(1) << 39;
|
static const local::TypeT<uint64_t(1) << 42> tAVX512VL;
|
||||||
static const Type tAVX512CD = uint64_t(1) << 40;
|
static const local::TypeT<uint64_t(1) << 43> tAVX512_VBMI;
|
||||||
static const Type tAVX512BW = uint64_t(1) << 41;
|
static const local::TypeT<uint64_t(1) << 43> tAVX512VBMI; // = tAVX512_VBMI; // changed by Intel's manual
|
||||||
static const Type tAVX512VL = uint64_t(1) << 42;
|
static const local::TypeT<uint64_t(1) << 44> tAVX512_4VNNIW;
|
||||||
static const Type tAVX512_VBMI = uint64_t(1) << 43;
|
static const local::TypeT<uint64_t(1) << 45> tAVX512_4FMAPS;
|
||||||
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
|
static const local::TypeT<uint64_t(1) << 46> tPREFETCHWT1;
|
||||||
static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
|
static const local::TypeT<uint64_t(1) << 47> tPREFETCHW;
|
||||||
static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
|
static const local::TypeT<uint64_t(1) << 48> tSHA;
|
||||||
static const Type tPREFETCHWT1 = uint64_t(1) << 46;
|
static const local::TypeT<uint64_t(1) << 49> tMPX;
|
||||||
static const Type tPREFETCHW = uint64_t(1) << 47;
|
static const local::TypeT<uint64_t(1) << 50> tAVX512_VBMI2;
|
||||||
static const Type tSHA = uint64_t(1) << 48;
|
static const local::TypeT<uint64_t(1) << 51> tGFNI;
|
||||||
static const Type tMPX = uint64_t(1) << 49;
|
static const local::TypeT<uint64_t(1) << 52> tVAES;
|
||||||
static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
|
static const local::TypeT<uint64_t(1) << 53> tVPCLMULQDQ;
|
||||||
static const Type tGFNI = uint64_t(1) << 51;
|
static const local::TypeT<uint64_t(1) << 54> tAVX512_VNNI;
|
||||||
static const Type tVAES = uint64_t(1) << 52;
|
static const local::TypeT<uint64_t(1) << 55> tAVX512_BITALG;
|
||||||
static const Type tVPCLMULQDQ = uint64_t(1) << 53;
|
static const local::TypeT<uint64_t(1) << 56> tAVX512_VPOPCNTDQ;
|
||||||
static const Type tAVX512_VNNI = uint64_t(1) << 54;
|
static const local::TypeT<uint64_t(1) << 57> tAVX512_BF16;
|
||||||
static const Type tAVX512_BITALG = uint64_t(1) << 55;
|
static const local::TypeT<uint64_t(1) << 58> tAVX512_VP2INTERSECT;
|
||||||
static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
|
static const local::TypeT<uint64_t(1) << 59> tAMX_TILE;
|
||||||
static const Type tAVX512_BF16 = uint64_t(1) << 57;
|
static const local::TypeT<uint64_t(1) << 60> tAMX_INT8;
|
||||||
static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
|
static const local::TypeT<uint64_t(1) << 61> tAMX_BF16;
|
||||||
static const Type tAMX_TILE = uint64_t(1) << 59;
|
static const local::TypeT<uint64_t(1) << 62> tAVX_VNNI;
|
||||||
static const Type tAMX_INT8 = uint64_t(1) << 60;
|
static const local::TypeT<uint64_t(1) << 63> tCLFLUSHOPT;
|
||||||
static const Type tAMX_BF16 = uint64_t(1) << 61;
|
static const local::TypeT<0, 1 << 0> tCLDEMOTE;
|
||||||
static const Type tAVX_VNNI = uint64_t(1) << 62;
|
static const local::TypeT<0, 1 << 1> tMOVDIRI;
|
||||||
|
static const local::TypeT<0, 1 << 2> tMOVDIR64B;
|
||||||
|
static const local::TypeT<0, 1 << 3> tCLZERO; // AMD Zen
|
||||||
|
|
||||||
Cpu()
|
Cpu()
|
||||||
: type_(NONE)
|
: type_(NONE)
|
||||||
|
@ -378,13 +410,13 @@ public:
|
||||||
, coresSharignDataCache_()
|
, coresSharignDataCache_()
|
||||||
, dataCacheLevels_(0)
|
, dataCacheLevels_(0)
|
||||||
{
|
{
|
||||||
unsigned int data[4] = {};
|
uint32_t data[4] = {};
|
||||||
const unsigned int& EAX = data[0];
|
const uint32_t& EAX = data[0];
|
||||||
const unsigned int& EBX = data[1];
|
const uint32_t& EBX = data[1];
|
||||||
const unsigned int& ECX = data[2];
|
const uint32_t& ECX = data[2];
|
||||||
const unsigned int& EDX = data[3];
|
const uint32_t& EDX = data[3];
|
||||||
getCpuid(0, data);
|
getCpuid(0, data);
|
||||||
const unsigned int maxNum = EAX;
|
const uint32_t maxNum = EAX;
|
||||||
static const char intel[] = "ntel";
|
static const char intel[] = "ntel";
|
||||||
static const char amd[] = "cAMD";
|
static const char amd[] = "cAMD";
|
||||||
if (ECX == get32bitAsBE(amd)) {
|
if (ECX == get32bitAsBE(amd)) {
|
||||||
|
@ -407,7 +439,8 @@ public:
|
||||||
|
|
||||||
// Extended flags information
|
// Extended flags information
|
||||||
getCpuid(0x80000000, data);
|
getCpuid(0x80000000, data);
|
||||||
if (EAX >= 0x80000001) {
|
const uint32_t maxExtendedNum = EAX;
|
||||||
|
if (maxExtendedNum >= 0x80000001) {
|
||||||
getCpuid(0x80000001, data);
|
getCpuid(0x80000001, data);
|
||||||
|
|
||||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||||
|
@ -419,6 +452,11 @@ public:
|
||||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (maxExtendedNum >= 0x80000008) {
|
||||||
|
getCpuid(0x80000008, data);
|
||||||
|
if (EBX & (1U << 0)) type_ |= tCLZERO;
|
||||||
|
}
|
||||||
|
|
||||||
getCpuid(1, data);
|
getCpuid(1, data);
|
||||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||||
|
@ -469,6 +507,7 @@ public:
|
||||||
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||||
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
||||||
|
if ((type_ & tAVX512BW) && (EDX & (1U << 23))) type_ |= tAVX512_FP16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -483,11 +522,16 @@ public:
|
||||||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||||
if (EBX & (1U << 19)) type_ |= tADX;
|
if (EBX & (1U << 19)) type_ |= tADX;
|
||||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||||
|
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
|
||||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||||
|
if (ECX & (1U << 5)) type_ |= tWAITPKG;
|
||||||
|
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
|
||||||
|
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
|
||||||
|
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
|
||||||
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
||||||
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
||||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||||
|
@ -511,9 +555,9 @@ public:
|
||||||
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
|
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
bool has(Type type) const
|
bool has(const Type& type) const
|
||||||
{
|
{
|
||||||
return (type & type_) != 0;
|
return (type & type_) == type;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -526,7 +570,7 @@ public:
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
return __rdtsc();
|
return __rdtsc();
|
||||||
#else
|
#else
|
||||||
unsigned int eax, edx;
|
uint32_t eax, edx;
|
||||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
return ((uint64_t)edx << 32) | eax;
|
return ((uint64_t)edx << 32) | eax;
|
||||||
#endif
|
#endif
|
||||||
|
@ -563,7 +607,7 @@ const int UseRDX = 1 << 7;
|
||||||
|
|
||||||
class Pack {
|
class Pack {
|
||||||
static const size_t maxTblNum = 15;
|
static const size_t maxTblNum = 15;
|
||||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
Xbyak::Reg64 tbl_[maxTblNum];
|
||||||
size_t n_;
|
size_t n_;
|
||||||
public:
|
public:
|
||||||
Pack() : tbl_(), n_(0) {}
|
Pack() : tbl_(), n_(0) {}
|
||||||
|
@ -580,32 +624,36 @@ public:
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
Pack(const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t0)
|
||||||
{ n_ = 1; tbl_[0] = &t0; }
|
{ n_ = 1; tbl_[0] = t0; }
|
||||||
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
|
{ n_ = 2; tbl_[0] = t0; tbl_[1] = t1; }
|
||||||
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
|
{ n_ = 3; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; }
|
||||||
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
|
{ n_ = 4; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; }
|
||||||
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
|
{ n_ = 5; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; }
|
||||||
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
|
{ n_ = 6; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; }
|
||||||
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
|
{ n_ = 7; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; }
|
||||||
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
|
{ n_ = 8; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; }
|
||||||
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
|
{ n_ = 9; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; }
|
||||||
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
{ n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
|
{ n_ = 10; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; }
|
||||||
|
Pack(const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
|
{ n_ = 11; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; }
|
||||||
|
Pack(const Xbyak::Reg64& tb, const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||||
|
{ n_ = 12; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; tbl_[11] = tb; }
|
||||||
Pack& append(const Xbyak::Reg64& t)
|
Pack& append(const Xbyak::Reg64& t)
|
||||||
{
|
{
|
||||||
if (n_ == maxTblNum) {
|
if (n_ == maxTblNum) {
|
||||||
fprintf(stderr, "ERR Pack::can't append\n");
|
fprintf(stderr, "ERR Pack::can't append\n");
|
||||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
|
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
|
||||||
}
|
}
|
||||||
tbl_[n_++] = &t;
|
tbl_[n_++] = t;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
void init(const Xbyak::Reg64 *tbl, size_t n)
|
void init(const Xbyak::Reg64 *tbl, size_t n)
|
||||||
|
@ -616,7 +664,7 @@ public:
|
||||||
}
|
}
|
||||||
n_ = n;
|
n_ = n;
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
tbl_[i] = &tbl[i];
|
tbl_[i] = tbl[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const Xbyak::Reg64& operator[](size_t n) const
|
const Xbyak::Reg64& operator[](size_t n) const
|
||||||
|
@ -625,7 +673,7 @@ public:
|
||||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
|
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
|
||||||
}
|
}
|
||||||
return *tbl_[n];
|
return tbl_[n];
|
||||||
}
|
}
|
||||||
size_t size() const { return n_; }
|
size_t size() const { return n_; }
|
||||||
/*
|
/*
|
||||||
|
@ -648,7 +696,7 @@ public:
|
||||||
void put() const
|
void put() const
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < n_; i++) {
|
for (size_t i = 0; i < n_; i++) {
|
||||||
printf("%s ", tbl_[i]->toString());
|
printf("%s ", tbl_[i].toString());
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue