Squashed 'externals/xbyak/' changes from d512551e..2794cde7
2794cde7 add xword, yword, etc. in Xbyak::util fb9c04e4 fix document for vfpclassps a51be78b fix test dependency 04fdfb1e update version e6354f8b add vgf2p8mulb 09a12642 add gf2p8affineqb d171ba0e add gf2p8affineinvqb 457f4fd0 add vpshufbitqmb 5af0ba39 add vpexpand{b,w} e450f965 vpopcnt{d,q} supports ptr_b 48499eb1 add vpdpbusd(s), vpdpwssd(s) 9c745109 add vpdpbusd, vpdpbusds 0e1a11b4 add vpopcnt{b,w,d,q} 9acfc132 add vpshrd(v){w,d,q} ac8de850 add vpshld(v){w,d,q} f181c259 add vcompressb, vcompressw 5a402477 vpclmulqdq supports AVX-512 9e16b40b vaes* supports AVX-512 7fde08e0 add flags for intel's manual 319433-030.pdf c5da3778 add test of v4fmaddps, vp4dpwssd, etc. e4fc9d8a fix mpx encoding d0b2fb62 add bnd(0xf2) prefix for MPX f12b5678 use db for array cd74ab44 remove bat file git-subtree-dir: externals/xbyak git-subtree-split: 2794cde79eb71e86490061cac9622ad0067b8d15
This commit is contained in:
parent
4ed09fda06
commit
9fb82036ca
16 changed files with 935 additions and 173 deletions
|
@ -228,6 +228,9 @@ void putXM_X()
|
|||
|
||||
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
|
||||
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
|
||||
{ 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 },
|
||||
{ 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -343,6 +346,28 @@ void putX_X_XM_IMM()
|
|||
|
||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
{ 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true },
|
||||
{ 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true },
|
||||
{ 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true },
|
||||
|
||||
{ 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
|
||||
{ 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -623,6 +648,14 @@ void putX_XM_IMM()
|
|||
|
||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
|
||||
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
{ 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false },
|
||||
|
||||
{ 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false },
|
||||
{ 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -661,6 +694,8 @@ void putMisc()
|
|||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||
|
||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||
}
|
||||
|
||||
void putV4FMA()
|
||||
|
|
|
@ -60,7 +60,7 @@ void putX_X_XM(bool omitOnly)
|
|||
{ 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 },
|
||||
{ 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 },
|
||||
{ 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 },
|
||||
{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0, true, true, 3 },
|
||||
{ 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 },
|
||||
{ 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 },
|
||||
{ 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 },
|
||||
|
||||
|
@ -202,6 +202,10 @@ void putX_X_XM(bool omitOnly)
|
|||
|
||||
{ 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 },
|
||||
{ 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
|
||||
|
||||
{ 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
|
||||
{ 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 },
|
||||
{ 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -615,6 +619,7 @@ void put()
|
|||
////////////////////////////////////////////////////////////////
|
||||
{
|
||||
const GenericTbl tbl[] = {
|
||||
{ "bnd", 0xf2 }, /* 0xf2 prefix for MPX */
|
||||
{ "cbw", 0x66, 0x98 },
|
||||
{ "cdq", 0x99 },
|
||||
{ "clc", 0xF8 },
|
||||
|
@ -941,11 +946,11 @@ void put()
|
|||
puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
|
||||
puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }");
|
||||
puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }");
|
||||
puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opModM(addr, bnd, 0x0F, 0x1A); }");
|
||||
puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }");
|
||||
puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }");
|
||||
puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }");
|
||||
puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }");
|
||||
puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opModM(addr, bnd, 0x0F, 0x1B); }");
|
||||
puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }");
|
||||
}
|
||||
// misc
|
||||
{
|
||||
|
@ -1257,10 +1262,10 @@ void put()
|
|||
{ 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 },
|
||||
{ 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 },
|
||||
|
||||
{ 0xDC, "aesenc", T_0F38 | T_66 | T_W0, 3 },
|
||||
{ 0xDD, "aesenclast", T_0F38 | T_66 | T_W0, 3 },
|
||||
{ 0xDE, "aesdec", T_0F38 | T_66 | T_W0, 3 },
|
||||
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_W0, 3 },
|
||||
{ 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
{ 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
Xbyak 5.52 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
Xbyak 5.601 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
=============
|
||||
|
||||
Abstract
|
||||
|
@ -128,7 +128,7 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]);
|
|||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 32-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
```
|
||||
Remark
|
||||
* k1, ..., k7 are new opmask registers.
|
||||
|
@ -333,6 +333,9 @@ The header files under xbyak/ are independent of cybozulib.
|
|||
|
||||
History
|
||||
-------------
|
||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.52
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.601
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -155,7 +155,7 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]);
|
|||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 32-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
|
||||
注意
|
||||
|
@ -343,6 +343,9 @@ cybozulibは単体テストでのみ利用されていて、xbyak/ディレク
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2018/01/24 ver 5.601 xword, ywordなどをXbyak::util名前空間に追加
|
||||
2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf)
|
||||
2017/08/22 ver 5.53 mpxエンコーディングバグ修正, bnd()プレフィクス追加
|
||||
2017/08/18 ver 5.52 align修正(thanks to MerryMage)
|
||||
2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen)
|
||||
2017/08/08 ver 5.50 mpx追加(thanks to magurosan)
|
||||
|
|
|
@ -61,15 +61,23 @@ void putCPUinfo()
|
|||
{ Cpu::tMOVBE, "movbe" },
|
||||
{ Cpu::tAVX512F, "avx512f" },
|
||||
{ Cpu::tAVX512DQ, "avx512dq" },
|
||||
{ Cpu::tAVX512IFMA, "avx512ifma" },
|
||||
{ Cpu::tAVX512IFMA, "avx512_ifma" },
|
||||
{ Cpu::tAVX512PF, "avx512pf" },
|
||||
{ Cpu::tAVX512ER, "avx512er" },
|
||||
{ Cpu::tAVX512CD, "avx512cd" },
|
||||
{ Cpu::tAVX512BW, "avx512bw" },
|
||||
{ Cpu::tAVX512VL, "avx512vl" },
|
||||
{ Cpu::tAVX512VBMI, "avx512vbmi" },
|
||||
{ Cpu::tAVX512VBMI, "avx512_vbmi" },
|
||||
{ Cpu::tAVX512_4VNNIW, "avx512_4vnniw" },
|
||||
{ Cpu::tAVX512_4FMAPS, "avx512_4fmaps" },
|
||||
|
||||
{ Cpu::tAVX512_VBMI2, "avx512_vbmi2" },
|
||||
{ Cpu::tGFNI, "gfni" },
|
||||
{ Cpu::tVAES, "vaes" },
|
||||
{ Cpu::tVPCLMULQDQ, "vpclmulqdq" },
|
||||
{ Cpu::tAVX512_VNNI, "avx512_vnni" },
|
||||
{ Cpu::tAVX512_BITALG, "avx512_bitalg" },
|
||||
{ Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
@echo off
|
||||
rm a.lst b.lst
|
||||
echo nasm
|
||||
nasm -l a.lst -f win64 test.asm
|
||||
cat a.lst
|
||||
echo yasm
|
||||
yasm -l b.lst -f win64 test.asm
|
||||
cat b.lst
|
|
@ -1,11 +1,11 @@
|
|||
TARGET = make_nm normalize_prefix jmp address nm_frame bad_address misc
|
||||
TARGET = make_nm normalize_prefix jmp address bad_address misc
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
|
||||
ifeq ($(MODE_BIT),64)
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64
|
||||
endif
|
||||
|
||||
|
@ -28,14 +28,12 @@ address: address.cpp ../xbyak/xbyak.h
|
|||
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
|
||||
address64: address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
|
||||
nm_frame: nm_frame.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) nm_frame.cpp -o $@ -m32
|
||||
bad_address: bad_address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||
misc: misc.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||
|
||||
test: normalize_prefix jmp bad_address
|
||||
test: normalize_prefix jmp bad_address $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
./test_nm.sh
|
||||
./test_nm.sh Y
|
||||
|
@ -65,7 +63,7 @@ ifeq ($(BIT),64)
|
|||
./test_avx512.sh 64
|
||||
endif
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) lib_run
|
||||
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||
|
||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
|
|
|
@ -86,13 +86,15 @@ public:
|
|||
}
|
||||
fflush(stdout);
|
||||
if (msg.empty()) {
|
||||
int err = ngCount_ + exceptionCount_;
|
||||
int total = okCount_ + err;
|
||||
std::cout << "ctest:name=" << getBaseName(*argv)
|
||||
<< ", module=" << list_.size()
|
||||
<< ", total=" << (okCount_ + ngCount_ + exceptionCount_)
|
||||
<< ", total=" << total
|
||||
<< ", ok=" << okCount_
|
||||
<< ", ng=" << ngCount_
|
||||
<< ", exception=" << exceptionCount_ << std::endl;
|
||||
return 0;
|
||||
return err > 0 ? 1 : 0;
|
||||
} else {
|
||||
std::cout << msg << std::endl;
|
||||
return 1;
|
||||
|
@ -128,6 +130,15 @@ bool isEqual(const T& lhs, const U& rhs)
|
|||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// avoid warning of comparision of integers of different signs
|
||||
inline bool isEqual(size_t lhs, int rhs)
|
||||
{
|
||||
return lhs == size_t(rhs);
|
||||
}
|
||||
inline bool isEqual(int lhs, size_t rhs)
|
||||
{
|
||||
return size_t(lhs) == rhs;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
|
@ -188,9 +199,9 @@ int main(int argc, char *argv[])
|
|||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL(x, y) { \
|
||||
bool eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!eq) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
|
@ -201,22 +212,39 @@ int main(int argc, char *argv[])
|
|||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_NEAR(x, y, eps) { \
|
||||
bool isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!isNear) { \
|
||||
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_isNear) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
|
||||
bool eq = x == y; \
|
||||
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!eq) { \
|
||||
bool _cybozu_eq = x == y; \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if x[] != y[]
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
@param n [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
|
||||
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
|
||||
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
always alert
|
||||
|
@ -229,25 +257,25 @@ int main(int argc, char *argv[])
|
|||
*/
|
||||
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
|
||||
{ \
|
||||
int ret = 0; \
|
||||
std::string errMsg; \
|
||||
int _cybozu_ret = 0; \
|
||||
std::string _cybozu_errMsg; \
|
||||
try { \
|
||||
statement; \
|
||||
ret = 1; \
|
||||
} catch (const Exception& e) { \
|
||||
errMsg = e.what(); \
|
||||
if (errMsg.find(msg) == std::string::npos) { \
|
||||
ret = 2; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception& _cybozu_e) { \
|
||||
_cybozu_errMsg = _cybozu_e.what(); \
|
||||
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
} catch (...) { \
|
||||
ret = 3; \
|
||||
_cybozu_ret = 3; \
|
||||
} \
|
||||
if (ret) { \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
|
||||
if (ret == 1) { \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else if (ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << errMsg << std::endl; \
|
||||
} else if (_cybozu_ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
|
@ -258,17 +286,17 @@ int main(int argc, char *argv[])
|
|||
|
||||
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
|
||||
{ \
|
||||
int ret = 0; \
|
||||
int _cybozu_ret = 0; \
|
||||
try { \
|
||||
statement; \
|
||||
ret = 1; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception&) { \
|
||||
} catch (...) { \
|
||||
ret = 2; \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
if (ret) { \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
|
||||
if (ret == 1) { \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
@ -401,7 +402,7 @@ CYBOZU_TEST_AUTO(test5)
|
|||
using namespace Xbyak;
|
||||
inLocalLabel();
|
||||
mov(ecx, count);
|
||||
xor(eax, eax);
|
||||
xor_(eax, eax);
|
||||
L(".lp");
|
||||
for (int i = 0; i < count; i++) {
|
||||
L(Label::toStr(i));
|
||||
|
|
|
@ -1363,6 +1363,22 @@ class Test {
|
|||
put("bndmk", BNDREG, MEM);
|
||||
put("bndmov", BNDREG, BNDREG|MEM);
|
||||
put("bndstx", MEM, BNDREG);
|
||||
put("bndstx", "ptr [eax]", "[eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
|
||||
put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
|
||||
put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
|
||||
put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
|
||||
put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
|
||||
put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
|
||||
#ifdef XBYAK64
|
||||
put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
|
||||
put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
|
||||
put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
|
||||
#endif
|
||||
}
|
||||
void putFpuMem16_32() const
|
||||
{
|
||||
|
|
580
test/misc.cpp
580
test/misc.cpp
|
@ -103,3 +103,583 @@ CYBOZU_TEST_AUTO(align)
|
|||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(vfmaddps)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
|
||||
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
|
||||
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
|
||||
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
|
||||
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
|
||||
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
|
||||
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
|
||||
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
|
||||
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
|
||||
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
|
||||
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vaes)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpclmulqdq)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
|
||||
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vcompressb_w)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vcompressb(ptr[rax + 64], xmm1);
|
||||
vcompressb(xmm30 | k5, xmm1);
|
||||
vcompressb(ptr[rax + 64], ymm1);
|
||||
vcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressb(ptr[rax + 64], zmm1);
|
||||
vcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
|
||||
vcompressw(ptr[rax + 64], xmm1);
|
||||
vcompressw(xmm30 | k5, xmm1);
|
||||
vcompressw(ptr[rax + 64], ymm1);
|
||||
vcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressw(ptr[rax + 64], zmm1);
|
||||
vcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shld)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shrd)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpopcnt)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpdpbus)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
|
||||
|
||||
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(gf2)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
///
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x38, 0xcf, 0xca,
|
||||
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
|
||||
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
|
||||
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
|
||||
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,7 @@ using namespace Xbyak;
|
|||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4245)
|
||||
#pragma warning(disable : 4312)
|
||||
#endif
|
||||
class Sample : public CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
@echo off
|
||||
set FILTER=cat
|
||||
set FILTER=grep -v warning
|
||||
if /i "%1"=="64" (
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
|
|
150
xbyak/xbyak.h
150
xbyak/xbyak.h
|
@ -105,7 +105,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5520 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5601 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -177,6 +177,7 @@ enum {
|
|||
ERR_INVALID_OPMASK_WITH_MEMORY,
|
||||
ERR_INVALID_ZERO,
|
||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_INTERNAL
|
||||
};
|
||||
|
||||
|
@ -237,6 +238,7 @@ public:
|
|||
"invalid opmask with memory",
|
||||
"invalid zero",
|
||||
"invalid rip in AutoGrow",
|
||||
"invalid mib address",
|
||||
"internal error",
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
|
@ -345,6 +347,9 @@ public:
|
|||
};
|
||||
#endif
|
||||
|
||||
class Address;
|
||||
class Reg;
|
||||
|
||||
class Operand {
|
||||
static const uint8 EXT8BIT = 0x20;
|
||||
unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
|
||||
|
@ -497,6 +502,8 @@ public:
|
|||
bool isEqualIfNotInherited(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_; }
|
||||
bool operator==(const Operand& rhs) const;
|
||||
bool operator!=(const Operand& rhs) const { return !operator==(rhs); }
|
||||
const Address& getAddress() const;
|
||||
const Reg& getReg() const;
|
||||
};
|
||||
|
||||
class Label;
|
||||
|
@ -530,6 +537,12 @@ public:
|
|||
#endif
|
||||
};
|
||||
|
||||
inline const Reg& Operand::getReg() const
|
||||
{
|
||||
assert(!isMEM());
|
||||
return static_cast<const Reg&>(*this);
|
||||
}
|
||||
|
||||
struct Reg8 : public Reg {
|
||||
explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { }
|
||||
};
|
||||
|
@ -543,6 +556,13 @@ struct Mmx : public Reg {
|
|||
};
|
||||
|
||||
struct EvexModifierRounding {
|
||||
enum {
|
||||
T_RN_SAE = 1,
|
||||
T_RD_SAE = 2,
|
||||
T_RU_SAE = 3,
|
||||
T_RZ_SAE = 4,
|
||||
T_SAE = 5
|
||||
};
|
||||
explicit EvexModifierRounding(int rounding) : rounding(rounding) {}
|
||||
int rounding;
|
||||
};
|
||||
|
@ -689,13 +709,15 @@ public:
|
|||
}
|
||||
}
|
||||
bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
|
||||
void optimize()
|
||||
RegExp optimize() const
|
||||
{
|
||||
RegExp exp = *this;
|
||||
// [reg * 2] => [reg + reg]
|
||||
if (index_.isBit(i32e) && !base_.getBit() && index_.getBit() && scale_ == 2) {
|
||||
base_ = index_;
|
||||
scale_ = 1;
|
||||
if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
|
||||
exp.base_ = index_;
|
||||
exp.scale_ = 1;
|
||||
}
|
||||
return exp;
|
||||
}
|
||||
bool operator==(const RegExp& rhs) const
|
||||
{
|
||||
|
@ -715,6 +737,11 @@ public:
|
|||
}
|
||||
friend RegExp operator+(const RegExp& a, const RegExp& b);
|
||||
friend RegExp operator-(const RegExp& e, size_t disp);
|
||||
uint8 getRex() const
|
||||
{
|
||||
uint8 rex = index_.getRexX() | base_.getRexB();
|
||||
return rex ? uint8(rex | 0x40) : 0;
|
||||
}
|
||||
private:
|
||||
/*
|
||||
[base_ + index_ * scale_ + disp_]
|
||||
|
@ -975,7 +1002,6 @@ public:
|
|||
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
|
||||
{
|
||||
e_.verify();
|
||||
e_.optimize();
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
explicit Address(size_t disp)
|
||||
|
@ -984,7 +1010,10 @@ public:
|
|||
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
|
||||
#endif
|
||||
void permitVsib() const { permitVsib_ = true; }
|
||||
const RegExp& getRegExp() const { return e_; }
|
||||
RegExp getRegExp(bool optimize = true) const
|
||||
{
|
||||
return optimize ? e_.optimize() : e_;
|
||||
}
|
||||
Mode getMode() const { return mode_; }
|
||||
bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
|
||||
bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
|
||||
|
@ -993,9 +1022,7 @@ public:
|
|||
{
|
||||
verify();
|
||||
if (mode_ != M_ModRM) return 0;
|
||||
uint8 rex = e_.getIndex().getRexX() | e_.getBase().getRexB();
|
||||
if (rex) rex |= 0x40;
|
||||
return rex;
|
||||
return getRegExp().getRex();
|
||||
}
|
||||
bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
|
||||
bool isBroadcast() const { return broadcast_; }
|
||||
|
@ -1014,9 +1041,15 @@ private:
|
|||
void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
|
||||
};
|
||||
|
||||
inline const Address& Operand::getAddress() const
|
||||
{
|
||||
assert(isMEM());
|
||||
return static_cast<const Address&>(*this);
|
||||
}
|
||||
|
||||
inline bool Operand::operator==(const Operand& rhs) const
|
||||
{
|
||||
if (isMEM() && rhs.isMEM()) return static_cast<const Address&>(*this) == static_cast<const Address&>(rhs);
|
||||
if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress();
|
||||
return isEqualIfNotInherited(rhs);
|
||||
}
|
||||
|
||||
|
@ -1363,12 +1396,12 @@ private:
|
|||
if (p1->isMEM()) std::swap(p1, p2);
|
||||
if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION);
|
||||
if (p2->isMEM()) {
|
||||
const Address& addr = static_cast<const Address&>(*p2);
|
||||
const Address& addr = p2->getAddress();
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
rex = addr.getRex() | static_cast<const Reg&>(*p1).getRex();
|
||||
rex = addr.getRex() | p1->getReg().getRex();
|
||||
} else {
|
||||
// ModRM(reg, base);
|
||||
rex = static_cast<const Reg&>(op2).getRex(static_cast<const Reg&>(op1));
|
||||
rex = op2.getReg().getRex(op1.getReg());
|
||||
}
|
||||
// except movsx(16bit, 32/64bit)
|
||||
if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66);
|
||||
|
@ -1447,13 +1480,6 @@ private:
|
|||
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
||||
return v;
|
||||
}
|
||||
enum {
|
||||
T_RN_SAE = 1,
|
||||
T_RD_SAE = 2,
|
||||
T_RU_SAE = 3,
|
||||
T_RZ_SAE = 4,
|
||||
T_SAE = 5
|
||||
};
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
|
||||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||
|
@ -1472,7 +1498,7 @@ private:
|
|||
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
|
||||
int disp8N = 1;
|
||||
if (rounding) {
|
||||
if (rounding == T_SAE){
|
||||
if (rounding == EvexModifierRounding::T_SAE) {
|
||||
verifySAE(base, type); LL = 0;
|
||||
} else {
|
||||
verifyER(base, type); LL = rounding - 1;
|
||||
|
@ -1581,6 +1607,17 @@ private:
|
|||
db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2);
|
||||
opAddr(addr, reg.getIdx(), immSize);
|
||||
}
|
||||
void opMIB(const Address& addr, const Reg& reg, int code0, int code1)
|
||||
{
|
||||
if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP);
|
||||
if (addr.getMode() != Address::M_ModRM) throw Error(ERR_INVALID_MIB_ADDRESS);
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
const RegExp& regExp = addr.getRegExp(false);
|
||||
uint8 rex = regExp.getRex();
|
||||
if (rex) db(rex);
|
||||
db(code0); db(code1);
|
||||
setSIB(regExp, reg.getIdx());
|
||||
}
|
||||
void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
|
||||
{
|
||||
const int shortJmpSize = 2;
|
||||
|
@ -1656,9 +1693,9 @@ private:
|
|||
if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION);
|
||||
if (pref != NONE) db(pref);
|
||||
if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), static_cast<const Reg&>(reg), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
|
||||
opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
|
||||
} else {
|
||||
opModR(static_cast<const Reg&>(reg), static_cast<const Reg&>(op), 0x0F, preCode, code);
|
||||
opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
|
||||
}
|
||||
if (imm8 != NONE) db(imm8);
|
||||
}
|
||||
|
@ -1676,9 +1713,9 @@ private:
|
|||
{
|
||||
if (pref != NONE) db(pref);
|
||||
if (op1.isXMM() && op2.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op2), static_cast<const Reg&>(op1), 0x0F, code);
|
||||
opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
|
||||
} else if (op1.isMEM() && op2.isXMM()) {
|
||||
opModM(static_cast<const Address&>(op1), static_cast<const Reg&>(op2), 0x0F, code | 1);
|
||||
opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
|
||||
} else {
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
|
@ -1687,7 +1724,7 @@ private:
|
|||
{
|
||||
if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModR(static_cast<const Reg&>(op), mmx, 0x0F, 0xC5); db(imm);
|
||||
opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
|
||||
} else {
|
||||
opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, 0x3A);
|
||||
}
|
||||
|
@ -1697,9 +1734,9 @@ private:
|
|||
int opBit = op.getBit();
|
||||
if (disableRex && opBit == 64) opBit = 32;
|
||||
if (op.isREG(bit)) {
|
||||
opModR(Reg(ext, Operand::REG, opBit), static_cast<const Reg&>(op).changeBit(opBit), code0, code1, code2);
|
||||
opModR(Reg(ext, Operand::REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
|
||||
} else if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
|
||||
opModM(op.getAddress(), Reg(ext, Operand::REG, opBit), code0, code1, code2, immSize);
|
||||
} else {
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
|
@ -1718,9 +1755,9 @@ private:
|
|||
void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
|
||||
{
|
||||
if (condR) {
|
||||
opModR(static_cast<const Reg&>(op1), static_cast<const Reg&>(op2), code0, code1, code2);
|
||||
opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
|
||||
} else if (condM) {
|
||||
opModM(static_cast<const Address&>(op2), static_cast<const Reg&>(op1), code0, code1, code2, immSize);
|
||||
opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
|
||||
} else {
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
|
@ -1735,7 +1772,7 @@ private:
|
|||
void opRM_RM(const Operand& op1, const Operand& op2, int code)
|
||||
{
|
||||
if (op1.isREG() && op2.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op2), static_cast<const Reg&>(op1), code | 2);
|
||||
opModM(op2.getAddress(), op1.getReg(), code | 2);
|
||||
} else {
|
||||
opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
|
||||
}
|
||||
|
@ -1768,19 +1805,19 @@ private:
|
|||
#endif
|
||||
code = 0xFE;
|
||||
if (op.isREG()) {
|
||||
opModR(Reg(ext, Operand::REG, op.getBit()), static_cast<const Reg&>(op), code);
|
||||
opModR(Reg(ext, Operand::REG, op.getBit()), op.getReg(), code);
|
||||
} else {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
}
|
||||
}
|
||||
void opPushPop(const Operand& op, int code, int ext, int alt)
|
||||
{
|
||||
if (op.isREG()) {
|
||||
if (op.isBit(16)) db(0x66);
|
||||
if (static_cast<const Reg&>(op).getIdx() >= 8) db(0x41);
|
||||
if (op.getReg().getIdx() >= 8) db(0x41);
|
||||
db(alt | (op.getIdx() & 7));
|
||||
} else if (op.isMEM()) {
|
||||
opModM(static_cast<const Address&>(op), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
|
||||
} else {
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
}
|
||||
|
@ -1869,11 +1906,12 @@ private:
|
|||
void opVex(const Reg& r, const Operand *p1, const Operand& op2, int type, int code, int imm8 = NONE)
|
||||
{
|
||||
if (op2.isMEM()) {
|
||||
const Address& addr = static_cast<const Address&>(op2);
|
||||
const Reg& base = addr.getRegExp().getBase();
|
||||
const Address& addr = op2.getAddress();
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
const Reg& base = regExp.getBase();
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
int disp8N = 0;
|
||||
bool x = addr.getRegExp().getIndex().isExtIdx();
|
||||
bool x = regExp.getIndex().isExtIdx();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
int aaa = addr.getOpmaskIdx();
|
||||
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
||||
|
@ -1882,14 +1920,14 @@ private:
|
|||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||
b = true;
|
||||
}
|
||||
int VL = addr.getRegExp().isVsib() ? addr.getRegExp().getIndex().getBit() : 0;
|
||||
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
||||
} else {
|
||||
vex(r, base, p1, type, code, x);
|
||||
}
|
||||
opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
|
||||
} else {
|
||||
const Reg& base = static_cast<const Reg&>(op2);
|
||||
const Reg& base = op2.getReg();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
|
||||
evex(r, base, p1, type, code);
|
||||
} else {
|
||||
|
@ -1971,11 +2009,12 @@ private:
|
|||
}
|
||||
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int mode)
|
||||
{
|
||||
if (!addr.getRegExp().isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
if (!regExp.isVsib(128 | 256)) throw Error(ERR_BAD_VSIB_ADDRESSING);
|
||||
const int y_vx_y = 0;
|
||||
const int y_vy_y = 1;
|
||||
// const int x_vy_x = 2;
|
||||
const bool isAddrYMM = addr.getRegExp().getIndex().getBit() == 256;
|
||||
const bool isAddrYMM = regExp.getIndex().getBit() == 256;
|
||||
if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) {
|
||||
bool isOK = false;
|
||||
if (mode == y_vx_y) {
|
||||
|
@ -2174,13 +2213,13 @@ public:
|
|||
const Address *addr = 0;
|
||||
uint8 code = 0;
|
||||
if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp]
|
||||
reg = &static_cast<const Reg&>(reg1);
|
||||
addr= &static_cast<const Address&>(reg2);
|
||||
reg = ®1.getReg();
|
||||
addr= ®2.getAddress();
|
||||
code = 0xA0;
|
||||
} else
|
||||
if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al
|
||||
reg = &static_cast<const Reg&>(reg2);
|
||||
addr= &static_cast<const Address&>(reg1);
|
||||
reg = ®2.getReg();
|
||||
addr= ®1.getAddress();
|
||||
code = 0xA2;
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
|
@ -2207,7 +2246,7 @@ public:
|
|||
void mov(const Operand& op, size_t imm)
|
||||
{
|
||||
if (op.isREG()) {
|
||||
const int size = mov_imm(static_cast<const Reg&>(op), imm);
|
||||
const int size = mov_imm(op.getReg(), imm);
|
||||
db(imm, size);
|
||||
} else if (op.isMEM()) {
|
||||
verifyMemHasSize(op);
|
||||
|
@ -2219,7 +2258,7 @@ public:
|
|||
if (!inner::IsInInt32(imm)) throw Error(ERR_IMM_IS_TOO_BIG);
|
||||
immSize = 4;
|
||||
}
|
||||
opModM(static_cast<const Address&>(op), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
|
||||
opModM(op.getAddress(), Reg(0, Operand::REG, op.getBit()), 0xC6, NONE, NONE, immSize);
|
||||
db(static_cast<uint32>(imm), immSize);
|
||||
} else {
|
||||
throw Error(ERR_BAD_COMBINATION);
|
||||
|
@ -2303,7 +2342,7 @@ public:
|
|||
}
|
||||
void mov(const Segment& seg, const Operand& op)
|
||||
{
|
||||
opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(static_cast<const Reg&>(op).cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
|
||||
opModRM(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2328,7 +2367,7 @@ public:
|
|||
, st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7)
|
||||
, k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7)
|
||||
, bnd0(0), bnd1(1), bnd2(2), bnd3(3)
|
||||
, T_sae(T_SAE), T_rn_sae(T_RN_SAE), T_rd_sae(T_RD_SAE), T_ru_sae(T_RU_SAE), T_rz_sae(T_RZ_SAE)
|
||||
, T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE)
|
||||
, T_z()
|
||||
#ifdef XBYAK64
|
||||
, rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15)
|
||||
|
@ -2423,9 +2462,7 @@ public:
|
|||
while (size > 0) {
|
||||
size_t len = (std::min)(n, size);
|
||||
const uint8 *seq = nopTbl[len - 1];
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
db(seq[i]);
|
||||
}
|
||||
db(seq, len);
|
||||
size -= len;
|
||||
}
|
||||
}
|
||||
|
@ -2456,10 +2493,13 @@ static const Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6),
|
|||
static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI);
|
||||
static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI);
|
||||
static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH);
|
||||
static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64);
|
||||
static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512);
|
||||
static const AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true);
|
||||
static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7);
|
||||
static const Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7);
|
||||
static const BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3);
|
||||
static const EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE);
|
||||
static const EvexModifierZero T_z;
|
||||
#ifdef XBYAK64
|
||||
static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15);
|
||||
static const Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.52"; }
|
||||
const char *getVersionString() const { return "5.601"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
@ -32,14 +32,15 @@ void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, is
|
|||
void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); }
|
||||
void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, T_0F38, 0xf3, false); }
|
||||
void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, T_0F38, 0xf3, false); }
|
||||
void bnd() { db(0xF2); }
|
||||
void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
|
||||
void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }
|
||||
void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
|
||||
void bndldx(const BoundsReg& bnd, const Address& addr) { opModM(addr, bnd, 0x0F, 0x1A); }
|
||||
void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }
|
||||
void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }
|
||||
void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }
|
||||
void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }
|
||||
void bndstx(const Address& addr, const BoundsReg& bnd) { opModM(addr, bnd, 0x0F, 0x1B); }
|
||||
void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }
|
||||
void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
|
||||
void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
|
||||
void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }
|
||||
|
@ -293,6 +294,9 @@ void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); }
|
|||
void fxtract() { db(0xD9); db(0xF4); }
|
||||
void fyl2x() { db(0xD9); db(0xF1); }
|
||||
void fyl2xp1() { db(0xD9); db(0xF9); }
|
||||
void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xCE, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
|
||||
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
|
||||
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
|
||||
|
@ -758,10 +762,10 @@ void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
|||
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); }
|
||||
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
|
||||
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
|
||||
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDE); }
|
||||
void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDF); }
|
||||
void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDC); }
|
||||
void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_W0, 0xDD); }
|
||||
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
|
||||
void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDF); }
|
||||
void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); }
|
||||
void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); }
|
||||
void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); }
|
||||
void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
|
||||
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
|
||||
|
@ -1001,6 +1005,9 @@ void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1
|
|||
void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
|
||||
void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
|
||||
void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
|
||||
void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
|
||||
void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
|
||||
void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
|
||||
void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }
|
||||
void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
|
||||
void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }
|
||||
|
@ -1099,7 +1106,7 @@ void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isME
|
|||
void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
|
||||
void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
|
||||
void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
|
||||
void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x44, imm); }
|
||||
void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
|
||||
void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
|
||||
void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
|
||||
void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }
|
||||
|
@ -1661,8 +1668,10 @@ void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX
|
|||
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
|
||||
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||
|
@ -1784,6 +1793,10 @@ void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 |
|
|||
void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); }
|
||||
void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); }
|
||||
void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xC4); }
|
||||
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50); }
|
||||
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x51); }
|
||||
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
||||
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x53); }
|
||||
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
|
||||
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x75); }
|
||||
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
|
||||
|
@ -1798,8 +1811,10 @@ void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
|
|||
void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); }
|
||||
void vpermt2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7D); }
|
||||
void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
|
||||
void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
|
||||
void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||
void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
|
||||
void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
|
||||
void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
|
||||
void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
|
||||
void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
|
||||
|
@ -1840,6 +1855,10 @@ void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_
|
|||
void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
|
||||
void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
|
||||
void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
|
||||
void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||
void vpopcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x55); }
|
||||
void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x55); }
|
||||
void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
|
||||
void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
|
||||
void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
|
||||
void vprold(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
|
||||
|
@ -1854,6 +1873,19 @@ void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 |
|
|||
void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
|
||||
void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
|
||||
void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
|
||||
void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
|
||||
void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
|
||||
void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
|
||||
void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); }
|
||||
void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); }
|
||||
void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); }
|
||||
void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); }
|
||||
void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); }
|
||||
void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); }
|
||||
void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); }
|
||||
void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); }
|
||||
void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); }
|
||||
void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }
|
||||
void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); }
|
||||
void vpsraq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
|
||||
void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); }
|
||||
|
|
|
@ -165,100 +165,120 @@ public:
|
|||
static const Type tMOVBE = uint64(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64(1) << 35;
|
||||
static const Type tAVX512DQ = uint64(1) << 36;
|
||||
static const Type tAVX512IFMA = uint64(1) << 37;
|
||||
static const Type tAVX512_IFMA = uint64(1) << 37;
|
||||
static const Type tAVX512IFMA = tAVX512_IFMA;
|
||||
static const Type tAVX512PF = uint64(1) << 38;
|
||||
static const Type tAVX512ER = uint64(1) << 39;
|
||||
static const Type tAVX512CD = uint64(1) << 40;
|
||||
static const Type tAVX512BW = uint64(1) << 41;
|
||||
static const Type tAVX512VL = uint64(1) << 42;
|
||||
static const Type tAVX512VBMI = uint64(1) << 43;
|
||||
static const Type tAVX512_VBMI = uint64(1) << 43;
|
||||
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
|
||||
static const Type tAVX512_4VNNIW = uint64(1) << 44;
|
||||
static const Type tAVX512_4FMAPS = uint64(1) << 45;
|
||||
static const Type tPREFETCHWT1 = uint64(1) << 46;
|
||||
static const Type tPREFETCHW = uint64(1) << 47;
|
||||
static const Type tSHA = uint64(1) << 48;
|
||||
static const Type tMPX = uint64(1) << 49;
|
||||
static const Type tAVX512_VBMI2 = uint64(1) << 50;
|
||||
static const Type tGFNI = uint64(1) << 51;
|
||||
static const Type tVAES = uint64(1) << 52;
|
||||
static const Type tVPCLMULQDQ = uint64(1) << 53;
|
||||
static const Type tAVX512_VNNI = uint64(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64(1) << 56;
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
{
|
||||
unsigned int data[4];
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
const unsigned int& EDX = data[3];
|
||||
getCpuid(0, data);
|
||||
const unsigned int maxNum = data[0];
|
||||
const unsigned int maxNum = EAX;
|
||||
static const char intel[] = "ntel";
|
||||
static const char amd[] = "cAMD";
|
||||
if (data[2] == get32bitAsBE(amd)) {
|
||||
if (ECX == get32bitAsBE(amd)) {
|
||||
type_ |= tAMD;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1U << 31)) type_ |= t3DN;
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 30)) type_ |= tE3DN;
|
||||
if (data[3] & (1U << 22)) type_ |= tMMX2;
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
}
|
||||
if (data[2] == get32bitAsBE(intel)) {
|
||||
if (ECX == get32bitAsBE(intel)) {
|
||||
type_ |= tINTEL;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (data[2] & (1U << 5)) type_ |= tLZCNT;
|
||||
if (data[2] & (1U << 8)) type_ |= tPREFETCHW;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (ECX & (1U << 5)) type_ |= tLZCNT;
|
||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
}
|
||||
getCpuid(1, data);
|
||||
if (data[2] & (1U << 0)) type_ |= tSSE3;
|
||||
if (data[2] & (1U << 9)) type_ |= tSSSE3;
|
||||
if (data[2] & (1U << 19)) type_ |= tSSE41;
|
||||
if (data[2] & (1U << 20)) type_ |= tSSE42;
|
||||
if (data[2] & (1U << 22)) type_ |= tMOVBE;
|
||||
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (data[2] & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (data[2] & (1U << 30)) type_ |= tRDRAND;
|
||||
if (data[2] & (1U << 29)) type_ |= tF16C;
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
if (ECX & (1U << 19)) type_ |= tSSE41;
|
||||
if (ECX & (1U << 20)) type_ |= tSSE42;
|
||||
if (ECX & (1U << 22)) type_ |= tMOVBE;
|
||||
if (ECX & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (ECX & (1U << 25)) type_ |= tAESNI;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
if (ECX & (1U << 29)) type_ |= tF16C;
|
||||
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (data[3] & (1U << 26)) type_ |= tSSE2;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 23)) type_ |= tMMX;
|
||||
if (EDX & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (EDX & (1U << 26)) type_ |= tSSE2;
|
||||
|
||||
if (type_ & tOSXSAVE) {
|
||||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64 bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
if (ECX & (1U << 12)) type_ |= tFMA;
|
||||
if (((bv >> 5) & 7) == 7) {
|
||||
getCpuidEx(7, 0, data);
|
||||
if (data[1] & (1U << 16)) type_ |= tAVX512F;
|
||||
if (EBX & (1U << 16)) type_ |= tAVX512F;
|
||||
if (type_ & tAVX512F) {
|
||||
if (data[1] & (1U << 17)) type_ |= tAVX512DQ;
|
||||
if (data[1] & (1U << 21)) type_ |= tAVX512IFMA;
|
||||
if (data[1] & (1U << 26)) type_ |= tAVX512PF;
|
||||
if (data[1] & (1U << 27)) type_ |= tAVX512ER;
|
||||
if (data[1] & (1U << 28)) type_ |= tAVX512CD;
|
||||
if (data[1] & (1U << 30)) type_ |= tAVX512BW;
|
||||
if (data[1] & (1U << 31)) type_ |= tAVX512VL;
|
||||
if (data[2] & (1U << 1)) type_ |= tAVX512VBMI;
|
||||
if (data[3] & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||
if (data[3] & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
if (EBX & (1U << 17)) type_ |= tAVX512DQ;
|
||||
if (EBX & (1U << 21)) type_ |= tAVX512_IFMA;
|
||||
if (EBX & (1U << 26)) type_ |= tAVX512PF;
|
||||
if (EBX & (1U << 27)) type_ |= tAVX512ER;
|
||||
if (EBX & (1U << 28)) type_ |= tAVX512CD;
|
||||
if (EBX & (1U << 30)) type_ |= tAVX512BW;
|
||||
if (EBX & (1U << 31)) type_ |= tAVX512VL;
|
||||
if (ECX & (1U << 1)) type_ |= tAVX512_VBMI;
|
||||
if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2;
|
||||
if (ECX & (1U << 8)) type_ |= tGFNI;
|
||||
if (ECX & (1U << 9)) type_ |= tVAES;
|
||||
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
|
||||
if (ECX & (1U << 11)) type_ |= tAVX512_VNNI;
|
||||
if (ECX & (1U << 12)) type_ |= tAVX512_BITALG;
|
||||
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
|
||||
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxNum >= 7) {
|
||||
getCpuidEx(7, 0, data);
|
||||
if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2;
|
||||
if (data[1] & (1U << 3)) type_ |= tBMI1;
|
||||
if (data[1] & (1U << 8)) type_ |= tBMI2;
|
||||
if (data[1] & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (data[1] & (1U << 18)) type_ |= tRDSEED;
|
||||
if (data[1] & (1U << 19)) type_ |= tADX;
|
||||
if (data[1] & (1U << 20)) type_ |= tSMAP;
|
||||
if (data[1] & (1U << 4)) type_ |= tHLE;
|
||||
if (data[1] & (1U << 11)) type_ |= tRTM;
|
||||
if (data[1] & (1U << 14)) type_ |= tMPX;
|
||||
if (data[1] & (1U << 29)) type_ |= tSHA;
|
||||
if (data[2] & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
|
||||
if (EBX & (1U << 3)) type_ |= tBMI1;
|
||||
if (EBX & (1U << 8)) type_ |= tBMI2;
|
||||
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||
if (EBX & (1U << 19)) type_ |= tADX;
|
||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
}
|
||||
setFamily();
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue