Squashed 'externals/xbyak/' changes from 671fc805..4a6fac8a
4a6fac8a update version to 5.77 801cf3fd cosmetic change of getNumCores d397e824 fix number of cores that share LLC cache a669e092 support non-intel-cpu visual studio af5f422e Merge branch 'fenghaitao-guard_x86' into develop 9b98dc17 Guard x86 specific codes with "#if defined(__i386__) || defined(__x86_64__)" dd4173e1 move some member variables input private f72646a7 update version 4612528f format change 4b95e862 Merge branch 'shelleygoel-master' 4c262fa6 add functionality to get num of cores using x2APIC ID bc70e7e1 recover Xbyak::CastTo d09a230f unlink Label when LabelManager is destroyed 973e8597 update version afdb9fe9 Xbyak::CastTo is removed b011aca4 add RegRip +/- int acae93cd increase max temp regs for StackFrame ea4e3562 util::StackFrame uses push/pop instead of mov 42462ef9 use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm); da9117a9 update version of readme.md d35f4fb7 fix the encoding of vinsertps for disp8N 1de435ed bf uses Label class 613922bd add Label L() for convenience 43e15583 fix typo 93579ee6 add protect-re.cpp 60004b5c fix url of protect-re.cpp 348b2709 fix typo of doc f34f6ed5 update manual 232110be update test 82b78bf0 add setProtectMode dd8b290f put warning message if pageSize != 4096 64775ca2 a little refactoring 7c3e7b85 fix wrong VSIB encoding with idx >= 16 git-subtree-dir: externals/xbyak git-subtree-split: 4a6fac8ade404f667b94170f713367fe7da2a852
This commit is contained in:
parent
dbb1f8cf37
commit
080b4b3aff
17 changed files with 994 additions and 489 deletions
|
@ -37,6 +37,7 @@
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
T_VSIB = 1 << 29,
|
T_VSIB = 1 << 29,
|
||||||
|
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -161,5 +162,9 @@ std::string type2String(int type)
|
||||||
if (!str.empty()) str += " | ";
|
if (!str.empty()) str += " | ";
|
||||||
str += "T_VSIB";
|
str += "T_VSIB";
|
||||||
}
|
}
|
||||||
|
if (type & T_MEM_EVEX) {
|
||||||
|
if (!str.empty()) str += " | ";
|
||||||
|
str += "T_MEM_EVEX";
|
||||||
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,7 +76,7 @@ void putX_X_XM(bool omitOnly)
|
||||||
{ 0xC2, "cmpss", T_0F | T_F3, true, true, 2 },
|
{ 0xC2, "cmpss", T_0F | T_F3, true, true, 2 },
|
||||||
{ 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 },
|
{ 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 },
|
||||||
{ 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 },
|
{ 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 },
|
||||||
{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0, true, true, 2 },
|
{ 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 },
|
||||||
{ 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
{ 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
||||||
{ 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
|
{ 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 },
|
||||||
{ 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
{ 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 },
|
||||||
|
@ -1491,16 +1491,16 @@ void put()
|
||||||
int idx;
|
int idx;
|
||||||
int type;
|
int type;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX },
|
{ "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||||
{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX },
|
{ "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||||
{ "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX },
|
{ "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||||
{ "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
{ "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||||
{ "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
|
{ "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
|
||||||
{ "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX },
|
{ "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||||
{ "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
{ "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||||
{ "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX },
|
{ "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX },
|
||||||
{ "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 },
|
{ "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 },
|
||||||
{ "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 },
|
{ "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 },
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
|
|
479
readme.md
479
readme.md
|
@ -1,107 +1,121 @@
|
||||||
|
|
||||||
Xbyak 5.67 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
# Xbyak 5.77 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||||
=============
|
|
||||||
|
|
||||||
Abstract
|
## Abstract
|
||||||
-------------
|
|
||||||
|
|
||||||
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
|
||||||
|
|
||||||
Feature
|
## Feature
|
||||||
-------------
|
* header file only
|
||||||
header file only
|
* Intel/MASM like syntax
|
||||||
you can use Xbyak's functions at once if xbyak.h is included.
|
* fully support AVX-512
|
||||||
|
|
||||||
### Supported Instructions Sets
|
**Note**: Xbyak uses and(), or(), xor(), not() functions, so `-fno-operator-names` option is necessary for gcc/clang.
|
||||||
|
|
||||||
MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512
|
Or define `XBYAK_NO_OP_NAMES` before including `xbyak.h` and use and_(), or_(), xor_(), not_() instead of them.
|
||||||
|
|
||||||
|
and_(), or_(), xor_(), not_() are always available.
|
||||||
|
|
||||||
|
`XBYAK_NO_OP_NAMES` will be defined in the feature version.
|
||||||
|
|
||||||
### Supported OS
|
### Supported OS
|
||||||
|
|
||||||
* Windows Xp, Vista, Windows 7(32bit, 64bit)
|
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
||||||
* Linux(32bit, 64bit)
|
* Linux(32bit, 64bit)
|
||||||
* Intel Mac OSX
|
* Intel macOS
|
||||||
|
|
||||||
### Supported Compilers
|
### Supported Compilers
|
||||||
|
|
||||||
* Visual Studio C++ VC2012 or later
|
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||||
* gcc 4.7 or later
|
|
||||||
* clang 3.3
|
|
||||||
* cygwin gcc 4.5.3
|
|
||||||
* icc 7.2
|
|
||||||
|
|
||||||
>Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc.
|
## Install
|
||||||
Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them.
|
|
||||||
and_(), or_(), xor_(), not_() are always available.
|
|
||||||
|
|
||||||
Install
|
The following files are necessary. Please add the path to your compile directory.
|
||||||
-------------
|
|
||||||
|
|
||||||
The following files are necessary. Please add the path to your compile directories.
|
|
||||||
|
|
||||||
* xbyak.h
|
* xbyak.h
|
||||||
* xbyak_mnemonic.h
|
* xbyak_mnemonic.h
|
||||||
|
* xbyak_util.h
|
||||||
|
|
||||||
Linux:
|
Linux:
|
||||||
|
```
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
make install
|
These files are copied into `/usr/local/include/xbyak`.
|
||||||
|
|
||||||
These files are copied into /usr/local/include/xbyak
|
## How to use it
|
||||||
|
|
||||||
New Feature
|
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||||
-------------
|
```
|
||||||
|
#define XBYAK_NO_OP_NAMES
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
Add support for AVX-512 instruction set.
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code(int x)
|
||||||
|
{
|
||||||
|
mov(eax, x);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
Make an instance of the class and get the function
|
||||||
|
pointer by calling `getCode()` and call it.
|
||||||
|
```
|
||||||
|
Code c(5);
|
||||||
|
int (*f)() = c.getCode<int (*)()>();
|
||||||
|
printf("ret=%d\n", f()); // ret = 5
|
||||||
|
```
|
||||||
|
|
||||||
Syntax
|
## Syntax
|
||||||
-------------
|
Similar to MASM/NASM syntax with parentheses.
|
||||||
|
|
||||||
Make Xbyak::CodeGenerator and make the class method and get the function
|
|
||||||
pointer by calling cgetCode() and casting the return value.
|
|
||||||
|
|
||||||
NASM Xbyak
|
|
||||||
mov eax, ebx --> mov(eax, ebx);
|
|
||||||
inc ecx inc(ecx);
|
|
||||||
ret --> ret();
|
|
||||||
|
|
||||||
### Addressing
|
|
||||||
|
|
||||||
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
|
||||||
[rip + 32bit disp] ; x64 only
|
|
||||||
|
|
||||||
NASM Xbyak
|
|
||||||
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
|
|
||||||
test byte [esp], 4 --> test (byte [esp], 4);
|
|
||||||
|
|
||||||
|
|
||||||
How to use Selector(Segment Register)
|
|
||||||
|
|
||||||
>Note: Segment class is not derived from Operand.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
|
NASM Xbyak
|
||||||
|
mov eax, ebx --> mov(eax, ebx);
|
||||||
|
inc ecx inc(ecx);
|
||||||
|
ret --> ret();
|
||||||
|
```
|
||||||
|
|
||||||
|
## Addressing
|
||||||
|
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||||
|
otherwise use `ptr`.
|
||||||
|
|
||||||
|
```
|
||||||
|
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||||
|
[rip + 32bit disp] ; x64 only
|
||||||
|
|
||||||
|
NASM Xbyak
|
||||||
|
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||||
|
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||||
|
test byte [esp], 4 --> test(byte [esp], 4);
|
||||||
|
inc qword [rax] --> inc(qword [rax]);
|
||||||
|
```
|
||||||
|
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||||
|
|
||||||
|
### How to use Selector (Segment Register)
|
||||||
|
```
|
||||||
|
mov eax, [fs:eax] --> putSeg(fs);
|
||||||
|
mov(eax, ptr [eax]);
|
||||||
mov ax, cs --> mov(ax, cs);
|
mov ax, cs --> mov(ax, cs);
|
||||||
```
|
```
|
||||||
|
**Note**: Segment class is not derived from `Operand`.
|
||||||
|
|
||||||
>you can use ptr for almost memory access unless you specify the size of memory.
|
## AVX
|
||||||
|
|
||||||
>dword, word and byte are member variables, then don't use dword as unsigned int, for example.
|
|
||||||
|
|
||||||
### AVX
|
|
||||||
|
|
||||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
|
||||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
|
||||||
vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3);
|
|
||||||
|
|
||||||
*Remark*
|
|
||||||
The omitted destination syntax as the following ss disabled.
|
|
||||||
```
|
```
|
||||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||||
|
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||||
|
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||||
```
|
```
|
||||||
define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility.
|
|
||||||
|
**Note**:
|
||||||
|
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||||
But the newer version will not support it.
|
But the newer version will not support it.
|
||||||
|
```
|
||||||
|
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||||
|
```
|
||||||
|
|
||||||
### AVX-512
|
## AVX-512
|
||||||
|
|
||||||
```
|
```
|
||||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||||
|
@ -130,97 +144,122 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5)
|
||||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||||
```
|
```
|
||||||
Remark
|
### Remark
|
||||||
* k1, ..., k7 are new opmask registers.
|
* `k1`, ..., `k7` are opmask registers.
|
||||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||||
* `k4 | k3` is different from `k3 | k4`.
|
* `k4 | k3` is different from `k3 | k4`.
|
||||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||||
* specify xword/yword/zword(_b) for m128/m256/m512 if necessary.
|
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||||
|
|
||||||
### Label
|
## Label
|
||||||
|
Two kinds of Label are supported. (String literal and Label class).
|
||||||
|
|
||||||
L("L1");
|
### String literal
|
||||||
jmp ("L1");
|
```
|
||||||
|
L("L1");
|
||||||
|
jmp("L1");
|
||||||
|
|
||||||
jmp ("L2");
|
jmp("L2");
|
||||||
...
|
...
|
||||||
a few mnemonics(8-bit displacement jmp)
|
a few mnemonics (8-bit displacement jmp)
|
||||||
...
|
...
|
||||||
L("L2");
|
L("L2");
|
||||||
|
|
||||||
jmp ("L3", T_NEAR);
|
jmp("L3", T_NEAR);
|
||||||
...
|
...
|
||||||
a lot of mnemonics(32-bit displacement jmp)
|
a lot of mnemonics (32-bit displacement jmp)
|
||||||
...
|
...
|
||||||
L("L3");
|
L("L3");
|
||||||
|
```
|
||||||
|
|
||||||
>Call hasUndefinedLabel() to verify your code has no undefined label.
|
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||||
> you can use a label for immediate value of mov like as mov (eax, "L2");
|
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||||
|
|
||||||
#### 1. support @@, @f, @b like MASM
|
### Support `@@`, `@f`, `@b` like MASM
|
||||||
|
|
||||||
L("@@"); // <A>
|
```
|
||||||
jmp("@b"); // jmp to <A>
|
L("@@"); // <A>
|
||||||
jmp("@f"); // jmp to <B>
|
jmp("@b"); // jmp to <A>
|
||||||
L("@@"); // <B>
|
jmp("@f"); // jmp to <B>
|
||||||
jmp("@b"); // jmp to <B>
|
L("@@"); // <B>
|
||||||
mov(eax, "@b");
|
jmp("@b"); // jmp to <B>
|
||||||
jmp(eax); // jmp to <B>
|
mov(eax, "@b");
|
||||||
|
jmp(eax); // jmp to <B>
|
||||||
|
```
|
||||||
|
|
||||||
#### 2. localization of label by calling inLocalLabel(), outLocallabel().
|
### Local label
|
||||||
|
|
||||||
labels begining of period between inLocalLabel() and outLocalLabel()
|
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||||
are dealed with local label.
|
are treated as a local label.
|
||||||
inLocalLabel() and outLocalLabel() can be nested.
|
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||||
|
|
||||||
void func1()
|
```
|
||||||
{
|
void func1()
|
||||||
inLocalLabel();
|
{
|
||||||
L(".lp"); // <A> ; local label
|
inLocalLabel();
|
||||||
...
|
L(".lp"); // <A> ; local label
|
||||||
jmp(".lp"); // jmpt to <A>
|
...
|
||||||
L("aaa"); // global label
|
jmp(".lp"); // jmp to <A>
|
||||||
outLocalLabel();
|
L("aaa"); // global label <C>
|
||||||
}
|
outLocalLabel();
|
||||||
|
|
||||||
void func2()
|
inLocalLabel();
|
||||||
{
|
L(".lp"); // <B> ; local label
|
||||||
inLocalLabel();
|
func1();
|
||||||
L(".lp"); // <B> ; local label
|
jmp(".lp"); // jmp to <B>
|
||||||
func1();
|
inLocalLabel();
|
||||||
jmp(".lp"); // jmp to <B>
|
jmp("aaa"); // jmp to <C>
|
||||||
inLocalLabel();
|
}
|
||||||
}
|
```
|
||||||
|
|
||||||
### Label class
|
### Label class
|
||||||
|
|
||||||
L() and jxx() functions support a new Label class.
|
`L()` and `jxx()` support Label class.
|
||||||
|
|
||||||
Label label1, label2;
|
```
|
||||||
L(label1);
|
Xbyak::Label label1, label2;
|
||||||
...
|
L(label1);
|
||||||
jmp(label1);
|
...
|
||||||
...
|
jmp(label1);
|
||||||
jmp(label2);
|
...
|
||||||
...
|
jmp(label2);
|
||||||
L(label2);
|
...
|
||||||
|
L(label2);
|
||||||
|
```
|
||||||
|
|
||||||
Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel.
|
Use `putL` for jmp table
|
||||||
|
```
|
||||||
|
Label labelTbl, L0, L1, L2;
|
||||||
|
mov(rax, labelTbl);
|
||||||
|
// rdx is an index of jump table
|
||||||
|
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||||
|
L(labelTbl);
|
||||||
|
putL(L0);
|
||||||
|
putL(L1);
|
||||||
|
putL(L2);
|
||||||
|
L(L0);
|
||||||
|
....
|
||||||
|
L(L1);
|
||||||
|
....
|
||||||
|
```
|
||||||
|
|
||||||
Label label1, label2;
|
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||||
L(label1);
|
|
||||||
...
|
|
||||||
jmp(label2);
|
|
||||||
...
|
|
||||||
assignL(label2, label1); // label2 <= label1
|
|
||||||
|
|
||||||
The above jmp opecode jumps label1.
|
```
|
||||||
|
Label label2;
|
||||||
|
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||||
|
...
|
||||||
|
jmp(label2); // label2 is not determined here
|
||||||
|
...
|
||||||
|
assignL(label2, label1); // label2 <- label1
|
||||||
|
```
|
||||||
|
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||||
|
|
||||||
* Restriction:
|
**Note**:
|
||||||
* srcLabel must be used in L().
|
* srcLabel must be used in `L()`.
|
||||||
* dstLabel must not be used in L().
|
* dstLabel must not be used in `L()`.
|
||||||
|
|
||||||
Label::getAddress() returns the address specified by the label instance and 0 if not specified.
|
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||||
```
|
```
|
||||||
// not AutoGrow mode
|
// not AutoGrow mode
|
||||||
Label label;
|
Label label;
|
||||||
|
@ -229,7 +268,7 @@ L(label);
|
||||||
assert(label.getAddress() == getCurr());
|
assert(label.getAddress() == getCurr());
|
||||||
```
|
```
|
||||||
|
|
||||||
### Rip
|
### Rip ; relative addressing
|
||||||
```
|
```
|
||||||
Label label;
|
Label label;
|
||||||
mov(eax, ptr [rip + label]); // eax = 4
|
mov(eax, ptr [rip + label]); // eax = 4
|
||||||
|
@ -243,92 +282,127 @@ int x;
|
||||||
...
|
...
|
||||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||||
```
|
```
|
||||||
### Code size
|
|
||||||
The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size.
|
|
||||||
|
|
||||||
class Quantize : public Xbyak::CodeGenerator {
|
## Code size
|
||||||
public:
|
The default max code size is 4096 bytes.
|
||||||
Quantize()
|
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||||
: CodeGenerator(8192)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
...
|
|
||||||
};
|
|
||||||
|
|
||||||
### use user allocated memory
|
```
|
||||||
|
class Quantize : public Xbyak::CodeGenerator {
|
||||||
|
public:
|
||||||
|
Quantize()
|
||||||
|
: CodeGenerator(8192)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## User allocated memory
|
||||||
|
|
||||||
You can make jit code on prepaired memory.
|
You can make jit code on prepaired memory.
|
||||||
|
|
||||||
class Sample : public Xbyak::CodeGenerator {
|
Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory.
|
||||||
public:
|
|
||||||
Sample(void *userPtr, size_t size)
|
|
||||||
: Xbyak::CodeGenerator(size, userPtr)
|
|
||||||
{
|
|
||||||
...
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const size_t codeSize = 1024;
|
|
||||||
uint8 buf[codeSize + 16];
|
|
||||||
|
|
||||||
// get 16-byte aligned address
|
|
||||||
uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf);
|
|
||||||
|
|
||||||
// append executable attribute to the memory
|
|
||||||
Xbyak::CodeArray::protect(p, codeSize, true);
|
|
||||||
|
|
||||||
// construct your jit code on the memory
|
|
||||||
Sample s(p, codeSize);
|
|
||||||
|
|
||||||
>See *sample/test0.cpp*
|
|
||||||
|
|
||||||
AutoGrow
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Under `AutoGrow` mode, Xbyak extends memory automatically if necessary.
|
|
||||||
Call ready() before calling getCode() to calc address of jmp.
|
|
||||||
```
|
```
|
||||||
struct Code : Xbyak::CodeGenerator {
|
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||||
Code()
|
|
||||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
struct Code : Xbyak::CodeGenerator {
|
||||||
{
|
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||||
...
|
{
|
||||||
}
|
mov(rax, 123);
|
||||||
};
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
Code c;
|
Code c;
|
||||||
c.ready(); // Don't forget to call this function
|
c.setProtectModeRE(); // set memory to Read/Exec
|
||||||
|
printf("%d\n", c.getCode<int(*)()>()());
|
||||||
|
}
|
||||||
```
|
```
|
||||||
>Don't use the address returned by getCurr() before calling ready().
|
|
||||||
>It may be invalid address.
|
|
||||||
>RESTRICTION : rip addressing is not supported in AutoGrow
|
|
||||||
|
|
||||||
Macro
|
**Note**: See [sample/test0.cpp](sample/test0.cpp).
|
||||||
-------------
|
|
||||||
|
### AutoGrow
|
||||||
|
|
||||||
|
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||||
|
|
||||||
|
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||||
|
```
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Code c;
|
||||||
|
// generate code for jit
|
||||||
|
c.ready(); // mode = Read/Write/Exec
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**:
|
||||||
|
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||||
|
|
||||||
|
### Read/Exec mode
|
||||||
|
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||||
|
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||||
|
call `setProtectModeRE()` after generating jit code.
|
||||||
|
|
||||||
|
```
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Code c;
|
||||||
|
c.setProtectModeRE();
|
||||||
|
...
|
||||||
|
|
||||||
|
```
|
||||||
|
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||||
|
See [protect-re.cpp](sample/protect-re.cpp).
|
||||||
|
|
||||||
|
## Macro
|
||||||
|
|
||||||
* **XBYAK32** is defined on 32bit.
|
* **XBYAK32** is defined on 32bit.
|
||||||
* **XBYAK64** is defined on 64bit.
|
* **XBYAK64** is defined on 64bit.
|
||||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
|
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
|
||||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
|
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
|
||||||
* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names`
|
* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names`
|
||||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future)
|
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future)
|
||||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
|
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
|
||||||
|
|
||||||
Sample
|
## Sample
|
||||||
-------------
|
|
||||||
|
|
||||||
* test0.cpp ; tiny sample of Xbyak(x86, x64)
|
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
|
||||||
* quantize.cpp ; JIT optimized quantization by fast division(x86 only)
|
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||||
* calc.cpp ; assemble and estimate a given polynomial(x86, x64)
|
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||||
* bf.cpp ; JIT brainfuck(x86, x64)
|
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
||||||
|
|
||||||
License
|
## License
|
||||||
-------------
|
|
||||||
|
|
||||||
modified new BSD License
|
modified new BSD License
|
||||||
http://opensource.org/licenses/BSD-3-Clause
|
http://opensource.org/licenses/BSD-3-Clause
|
||||||
|
|
||||||
History
|
## History
|
||||||
-------------
|
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||||
|
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||||
|
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||||
|
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||||
|
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||||
|
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||||
|
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||||
|
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||||
|
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||||
|
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||||
|
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||||
* 2018/Jul/26 ver 5.661 support mingw64
|
* 2018/Jul/26 ver 5.661 support mingw64
|
||||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||||
|
@ -392,8 +466,7 @@ History
|
||||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm).
|
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||||
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
|
||||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||||
|
@ -453,8 +526,6 @@ History
|
||||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||||
* 2007/Jan/4 first version
|
* 2007/Jan/4 first version
|
||||||
|
|
||||||
Author
|
## Author
|
||||||
-------------
|
|
||||||
|
|
||||||
MITSUNARI Shigeo(herumi@nifty.com)
|
MITSUNARI Shigeo(herumi@nifty.com)
|
||||||
|
|
||||||
|
|
52
readme.txt
52
readme.txt
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.67
|
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.77
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎概要
|
◎概要
|
||||||
|
@ -245,8 +245,8 @@ void func2()
|
||||||
|
|
||||||
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
|
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
|
||||||
|
|
||||||
Label label1, label2;
|
Label label2;
|
||||||
L(label1);
|
Label label1 = L(); // Label label1; L(label1);と同じ意味
|
||||||
...
|
...
|
||||||
jmp(label2);
|
jmp(label2);
|
||||||
...
|
...
|
||||||
|
@ -309,6 +309,41 @@ bool CodeArray::protect(const void *addr, size_t size, bool canExec);
|
||||||
*/
|
*/
|
||||||
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
|
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
|
||||||
|
|
||||||
|
・read/execモード
|
||||||
|
デフォルトのCodeGeneratorはコンストラクト時にJIT用の領域をread/write/execモードに設定して利用します。
|
||||||
|
コード生成時はread/writeでコード実行時にはread/execにしたい場合、次のようにしてください。
|
||||||
|
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeのままコード生成
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Code c;
|
||||||
|
c.setProtectModeRE(); // read/execモードに変更
|
||||||
|
// JIT領域を実行
|
||||||
|
|
||||||
|
AutoGrowの場合はreadyの代わりにreadyRE()を読んでください。
|
||||||
|
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeのままコード生成
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Code c;
|
||||||
|
c.readyRE(); // read/exeモードに変更
|
||||||
|
// JIT領域を実行
|
||||||
|
|
||||||
|
setProtectModeRW()を呼ぶと領域が元のread/execモードに戻ります。
|
||||||
|
|
||||||
|
|
||||||
その他詳細は各種サンプルを参照してください。
|
その他詳細は各種サンプルを参照してください。
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎マクロ
|
◎マクロ
|
||||||
|
@ -338,6 +373,17 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
◎履歴
|
◎履歴
|
||||||
|
|
||||||
|
2019/03/06 ver 5.77 LLCキャッシュを共有数CPU数の修整(by densamoilov)
|
||||||
|
2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel)
|
||||||
|
2018/10/31 ver 5.751 互換性のためにXbyak::CastToの復元
|
||||||
|
2018/10/29 ver 5.75 LabelManagerのデストラクタでLabelから参照を切り離す
|
||||||
|
2018/10/21 ver 5.74 RegRip +/intの形をサポート Xbyak::CastToを削除
|
||||||
|
2018/10/15 util::StackFrameでmovの代わりにpush/popを使う
|
||||||
|
2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwなどの(reg, mem, imm8)に対するevexエンコーディング修整
|
||||||
|
2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||||
|
2018/08/27 ver 5.71 新しいlabelインスタンスを返すL()を追加
|
||||||
|
2018/08/27 ver 5.70 read/exec設定のためのsetProtectMode()とDontUseProtectの追加
|
||||||
|
2018/08/24 ver 5.68 indexが16以上のVSIBエンコーディングのバグ修正(thanks to petercaday)
|
||||||
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
2018/08/14 ver 5.67 Addressクラス内のmutableを削除 ; fix setCacheHierarchy for cloud vm
|
||||||
2018/07/26 ver 5.661 mingw64対応
|
2018/07/26 ver 5.661 mingw64対応
|
||||||
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
2018/07/24 ver 5.66 protect()のmodeにCodeArray::PROTECT_REを追加
|
||||||
|
|
|
@ -10,12 +10,6 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class Brainfuck : public Xbyak::CodeGenerator {
|
class Brainfuck : public Xbyak::CodeGenerator {
|
||||||
private:
|
|
||||||
enum Direction { B, F };
|
|
||||||
std::string toStr(int labelNo, Direction dir)
|
|
||||||
{
|
|
||||||
return Xbyak::Label::toStr(labelNo) + (dir == B ? 'B' : 'F');
|
|
||||||
}
|
|
||||||
public:
|
public:
|
||||||
int getContinuousChar(std::istream& is, char c)
|
int getContinuousChar(std::istream& is, char c)
|
||||||
{
|
{
|
||||||
|
@ -67,8 +61,7 @@ public:
|
||||||
mov(pGetchar, rsi); // getchar
|
mov(pGetchar, rsi); // getchar
|
||||||
mov(stack, rdx); // stack
|
mov(stack, rdx); // stack
|
||||||
#endif
|
#endif
|
||||||
int labelNo = 0;
|
std::stack<Label> labelF, labelB;
|
||||||
std::stack<int> keepLabelNo;
|
|
||||||
char c;
|
char c;
|
||||||
while (is >> c) {
|
while (is >> c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
@ -116,17 +109,22 @@ public:
|
||||||
mov(cur, eax);
|
mov(cur, eax);
|
||||||
break;
|
break;
|
||||||
case '[':
|
case '[':
|
||||||
L(toStr(labelNo, B));
|
{
|
||||||
mov(eax, cur);
|
Label B = L();
|
||||||
test(eax, eax);
|
labelB.push(B);
|
||||||
jz(toStr(labelNo, F), T_NEAR);
|
mov(eax, cur);
|
||||||
keepLabelNo.push(labelNo++);
|
test(eax, eax);
|
||||||
|
Label F;
|
||||||
|
jz(F, T_NEAR);
|
||||||
|
labelF.push(F);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case ']':
|
case ']':
|
||||||
{
|
{
|
||||||
int no = keepLabelNo.top(); keepLabelNo.pop();
|
Label B = labelB.top(); labelB.pop();
|
||||||
jmp(toStr(no, B));
|
jmp(B);
|
||||||
L(toStr(no, F));
|
Label F = labelF.top(); labelF.pop();
|
||||||
|
L(F);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -200,7 +198,7 @@ int main(int argc, char *argv[])
|
||||||
Brainfuck bf(ifs);
|
Brainfuck bf(ifs);
|
||||||
if (mode == 0) {
|
if (mode == 0) {
|
||||||
static int stack[128 * 1024];
|
static int stack[128 * 1024];
|
||||||
bf.getCode<void (*)(void*, void*, int *)>()(Xbyak::CastTo<void*>(putchar), Xbyak::CastTo<void*>(getchar), stack);
|
bf.getCode<void (*)(const void*, const void*, int *)>()(reinterpret_cast<const void*>(putchar), reinterpret_cast<const void*>(getchar), stack);
|
||||||
} else {
|
} else {
|
||||||
dump(bf.getCode(), bf.getSize());
|
dump(bf.getCode(), bf.getSize());
|
||||||
}
|
}
|
||||||
|
|
70
sample/protect-re.cpp
Normal file
70
sample/protect-re.cpp
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
#define XBYAK_NO_OP_NAMES
|
||||||
|
#include <xbyak/xbyak.h>
|
||||||
|
|
||||||
|
struct Code1 : Xbyak::CodeGenerator {
|
||||||
|
Code1()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
void update()
|
||||||
|
{
|
||||||
|
db(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void test1(bool updateCode)
|
||||||
|
{
|
||||||
|
Code1 c;
|
||||||
|
c.setProtectModeRE();
|
||||||
|
if (updateCode) c.update(); // segmentation fault
|
||||||
|
int (*f)() = c.getCode<int (*)()>();
|
||||||
|
printf("f=%d\n", f());
|
||||||
|
|
||||||
|
c.setProtectModeRW();
|
||||||
|
c.update();
|
||||||
|
puts("ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Code2 : Xbyak::CodeGenerator {
|
||||||
|
Code2()
|
||||||
|
: Xbyak::CodeGenerator(4096, Xbyak::AutoGrow)
|
||||||
|
{
|
||||||
|
mov(eax, 123);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
void update()
|
||||||
|
{
|
||||||
|
db(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void test2(bool updateCode)
|
||||||
|
{
|
||||||
|
Code2 c;
|
||||||
|
c.readyRE();
|
||||||
|
if (updateCode) c.update(); // segmentation fault
|
||||||
|
int (*f)() = c.getCode<int (*)()>();
|
||||||
|
printf("f=%d\n", f());
|
||||||
|
|
||||||
|
c.setProtectModeRW();
|
||||||
|
c.update();
|
||||||
|
puts("ok");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
if (argc < 2) {
|
||||||
|
fprintf(stderr, "%s <testNum> [update]\n", argv[0]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
bool update = argc == 3;
|
||||||
|
int n = atoi(argv[1]);
|
||||||
|
printf("n=%d update=%d\n", n, update);
|
||||||
|
switch (n) {
|
||||||
|
case 1: test1(update); break;
|
||||||
|
case 2: test2(update); break;
|
||||||
|
default: fprintf(stderr, "no test %d\n", n); break;
|
||||||
|
}
|
||||||
|
}
|
|
@ -32,7 +32,7 @@ struct Code : Xbyak::CodeGenerator {
|
||||||
|
|
||||||
inline int add(int a, int b)
|
inline int add(int a, int b)
|
||||||
{
|
{
|
||||||
return Xbyak::CastTo<int (*)(int,int)>(buf)(a, b);
|
return reinterpret_cast<int (*)(int, int)>(buf)(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -77,7 +77,7 @@ public:
|
||||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||||
call(atoi);
|
call(atoi);
|
||||||
#else
|
#else
|
||||||
call(Xbyak::CastTo<void*>(atoi));
|
call(reinterpret_cast<const void*>(atoi));
|
||||||
#endif
|
#endif
|
||||||
add(esp, 4);
|
add(esp, 4);
|
||||||
#endif
|
#endif
|
||||||
|
@ -96,7 +96,7 @@ public:
|
||||||
mov(rax, (size_t)atoi);
|
mov(rax, (size_t)atoi);
|
||||||
jmp(rax);
|
jmp(rax);
|
||||||
#else
|
#else
|
||||||
jmp(Xbyak::CastTo<void*>(atoi));
|
jmp(reinterpret_cast<const void*>(atoi));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
|
||||||
|
@ -171,8 +171,9 @@ int main()
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
int (*func)(int) = s.getCode<int (*)(int)>();
|
int (*func)(int) = s.getCode<int (*)(int)>();
|
||||||
if (Xbyak::CastTo<uint8*>(func) != p) {
|
const uint8 *funcp = reinterpret_cast<const uint8*>(func);
|
||||||
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
|
if (funcp != p) {
|
||||||
|
fprintf(stderr, "internal error %p %p\n", p, funcp);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
printf("0 + ... + %d = %d\n", 100, func(100));
|
printf("0 + ... + %d = %d\n", 100, func(100));
|
||||||
|
|
|
@ -104,9 +104,12 @@ void putCPUinfo()
|
||||||
Core i7-3930K 6 2D
|
Core i7-3930K 6 2D
|
||||||
*/
|
*/
|
||||||
cpu.putFamily();
|
cpu.putFamily();
|
||||||
|
if (!cpu.has(Cpu::tINTEL)) return;
|
||||||
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
for (unsigned int i = 0; i < cpu.getDataCacheLevels(); i++) {
|
||||||
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
printf("cache level=%u data cache size=%u cores sharing data cache=%u\n", i, cpu.getDataCacheSize(i), cpu.getCoresSharingDataCache(i));
|
||||||
}
|
}
|
||||||
|
printf("SmtLevel =%u\n", cpu.getNumCores(Xbyak::util::SmtLevel));
|
||||||
|
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -204,7 +204,7 @@ public:
|
||||||
push(reg[r]);
|
push(reg[r]);
|
||||||
push('A' + r);
|
push('A' + r);
|
||||||
push((int)str);
|
push((int)str);
|
||||||
call(Xbyak::CastTo<void*>(printf));
|
call(reinterpret_cast<const void*>(printf));
|
||||||
add(esp, 4 * 4);
|
add(esp, 4 * 4);
|
||||||
pop(ecx);
|
pop(ecx);
|
||||||
pop(edx);
|
pop(edx);
|
||||||
|
|
123
test/jmp.cpp
123
test/jmp.cpp
|
@ -889,6 +889,34 @@ CYBOZU_TEST_AUTO(testNewLabel)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(returnLabel)
|
||||||
|
{
|
||||||
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
Code()
|
||||||
|
{
|
||||||
|
xor_(eax, eax);
|
||||||
|
Label L1 = L();
|
||||||
|
test(eax, eax);
|
||||||
|
Label exit;
|
||||||
|
jnz(exit);
|
||||||
|
inc(eax); // 1
|
||||||
|
Label L2;
|
||||||
|
call(L2);
|
||||||
|
jmp(L1);
|
||||||
|
L(L2);
|
||||||
|
inc(eax); // 2
|
||||||
|
ret();
|
||||||
|
L(exit);
|
||||||
|
inc(eax); // 3
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Code code;
|
||||||
|
int (*f)() = code.getCode<int (*)()>();
|
||||||
|
int r = f();
|
||||||
|
CYBOZU_TEST_EQUAL(r, 3);
|
||||||
|
}
|
||||||
|
|
||||||
CYBOZU_TEST_AUTO(testAssign)
|
CYBOZU_TEST_AUTO(testAssign)
|
||||||
{
|
{
|
||||||
struct Code : Xbyak::CodeGenerator {
|
struct Code : Xbyak::CodeGenerator {
|
||||||
|
@ -987,6 +1015,52 @@ struct GetAddressCode1 : Xbyak::CodeGenerator {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CodeLabelTable : Xbyak::CodeGenerator {
|
||||||
|
enum { ret0 = 3 };
|
||||||
|
enum { ret1 = 5 };
|
||||||
|
enum { ret2 = 8 };
|
||||||
|
CodeLabelTable()
|
||||||
|
{
|
||||||
|
using namespace Xbyak;
|
||||||
|
#ifdef XBYAK64_WIN
|
||||||
|
const Reg64& p0 = rcx;
|
||||||
|
const Reg64& a = rax;
|
||||||
|
#elif defined (XBYAK64_GCC)
|
||||||
|
const Reg64& p0 = rdi;
|
||||||
|
const Reg64& a = rax;
|
||||||
|
#else
|
||||||
|
const Reg32& p0 = edx;
|
||||||
|
const Reg32& a = eax;
|
||||||
|
mov(edx, ptr [esp + 4]);
|
||||||
|
#endif
|
||||||
|
Label labelTbl, L0, L1, L2;
|
||||||
|
mov(a, labelTbl);
|
||||||
|
jmp(ptr [a + p0 * sizeof(void*)]);
|
||||||
|
L(labelTbl);
|
||||||
|
putL(L0);
|
||||||
|
putL(L1);
|
||||||
|
putL(L2);
|
||||||
|
L(L0);
|
||||||
|
mov(a, ret0);
|
||||||
|
ret();
|
||||||
|
L(L1);
|
||||||
|
mov(a, ret1);
|
||||||
|
ret();
|
||||||
|
L(L2);
|
||||||
|
mov(a, ret2);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
CYBOZU_TEST_AUTO(LabelTable)
|
||||||
|
{
|
||||||
|
CodeLabelTable c;
|
||||||
|
int (*f)(int) = c.getCode<int (*)(int)>();
|
||||||
|
CYBOZU_TEST_EQUAL(f(0), c.ret0);
|
||||||
|
CYBOZU_TEST_EQUAL(f(1), c.ret1);
|
||||||
|
CYBOZU_TEST_EQUAL(f(2), c.ret2);
|
||||||
|
}
|
||||||
|
|
||||||
CYBOZU_TEST_AUTO(getAddress1)
|
CYBOZU_TEST_AUTO(getAddress1)
|
||||||
{
|
{
|
||||||
GetAddressCode1 c;
|
GetAddressCode1 c;
|
||||||
|
@ -1143,11 +1217,56 @@ CYBOZU_TEST_AUTO(rip_addr_with_fixed_buf)
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
} code;
|
} code;
|
||||||
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RE);
|
code.setProtectModeRE();
|
||||||
code.getCode<void (*)()>()();
|
code.getCode<void (*)()>()();
|
||||||
CYBOZU_TEST_EQUAL(*x0, 123);
|
CYBOZU_TEST_EQUAL(*x0, 123);
|
||||||
CYBOZU_TEST_EQUAL(*x1, 456);
|
CYBOZU_TEST_EQUAL(*x1, 456);
|
||||||
CYBOZU_TEST_EQUAL(buf[8], 99);
|
CYBOZU_TEST_EQUAL(buf[8], 99);
|
||||||
Xbyak::CodeArray::protect(p, 4096, Xbyak::CodeArray::PROTECT_RW);
|
code.setProtectModeRW();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
struct ReleaseTestCode : Xbyak::CodeGenerator {
|
||||||
|
ReleaseTestCode(Label& L1, Label& L2, Label& L3)
|
||||||
|
{
|
||||||
|
L(L1);
|
||||||
|
jmp(L1);
|
||||||
|
L(L2);
|
||||||
|
jmp(L3); // not assigned
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
code must unlink label if code is destroyed
|
||||||
|
*/
|
||||||
|
CYBOZU_TEST_AUTO(release_label_after_code)
|
||||||
|
{
|
||||||
|
puts("---");
|
||||||
|
{
|
||||||
|
Label L1, L2, L3, L4, L5;
|
||||||
|
{
|
||||||
|
ReleaseTestCode code(L1, L2, L3);
|
||||||
|
CYBOZU_TEST_ASSERT(L1.getId() > 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L1.getAddress() != 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L2.getId() > 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L2.getAddress() != 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L3.getId() > 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L3.getAddress() == 0); // L3 is not assigned
|
||||||
|
code.assignL(L4, L1);
|
||||||
|
L5 = L1;
|
||||||
|
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||||
|
}
|
||||||
|
puts("code is released");
|
||||||
|
CYBOZU_TEST_ASSERT(L1.getId() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L1.getAddress() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L2.getId() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L2.getAddress() == 0);
|
||||||
|
// CYBOZU_TEST_ASSERT(L3.getId() == 0); // L3 is not assigned so not cleared
|
||||||
|
CYBOZU_TEST_ASSERT(L3.getAddress() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L4.getId() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L4.getAddress() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L5.getId() == 0);
|
||||||
|
CYBOZU_TEST_ASSERT(L5.getAddress() == 0);
|
||||||
|
printf("id=%d %d %d %d %d\n", L1.getId(), L2.getId(), L3.getId(), L4.getId(), L5.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -73,7 +73,6 @@ const uint64 YMM_ER = 1ULL << 36;
|
||||||
const uint64 VM32Y_K = 1ULL << 37;
|
const uint64 VM32Y_K = 1ULL << 37;
|
||||||
const uint64 IMM_2 = 1ULL << 38;
|
const uint64 IMM_2 = 1ULL << 38;
|
||||||
const uint64 IMM = IMM_1 | IMM_2;
|
const uint64 IMM = IMM_1 | IMM_2;
|
||||||
const uint64 XMM = _XMM | _XMM2;
|
|
||||||
const uint64 YMM = _YMM | _YMM2;
|
const uint64 YMM = _YMM | _YMM2;
|
||||||
const uint64 K = 1ULL << 43;
|
const uint64 K = 1ULL << 43;
|
||||||
const uint64 _ZMM = 1ULL << 44;
|
const uint64 _ZMM = 1ULL << 44;
|
||||||
|
@ -90,7 +89,10 @@ const uint64 ZMM_SAE = 1ULL << 48;
|
||||||
const uint64 ZMM_ER = 1ULL << 49;
|
const uint64 ZMM_ER = 1ULL << 49;
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
const uint64 _XMM3 = 1ULL << 50;
|
const uint64 _XMM3 = 1ULL << 50;
|
||||||
|
#else
|
||||||
|
const uint64 _XMM3 = 0;
|
||||||
#endif
|
#endif
|
||||||
|
const uint64 XMM = _XMM | _XMM2 | _XMM3;
|
||||||
const uint64 XMM_SAE = 1ULL << 51;
|
const uint64 XMM_SAE = 1ULL << 51;
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
const uint64 XMM_KZ = 1ULL << 52;
|
const uint64 XMM_KZ = 1ULL << 52;
|
||||||
|
@ -352,7 +354,8 @@ class Test {
|
||||||
case VM32Y_K:
|
case VM32Y_K:
|
||||||
return isXbyak_ ? "ptr [64+ymm13*2+r13] | k6" : "[64+ymm13*2+r13]{k6}";
|
return isXbyak_ ? "ptr [64+ymm13*2+r13] | k6" : "[64+ymm13*2+r13]{k6}";
|
||||||
case VM32Z_K:
|
case VM32Z_K:
|
||||||
return isXbyak_ ? "ptr [64+zmm13*2+r13] | k6" : "[64+zmm13*2+r13]{k6}";
|
if (idx & 1) return isXbyak_ ? "ptr [64+zmm10*8+r9] | k6" : "[64+zmm10*8+r9]{k6}";
|
||||||
|
return isXbyak_ ? "ptr [64+zmm30*2+r13] | k6" : "[64+zmm30*2+r13]{k6}";
|
||||||
case VM32Z:
|
case VM32Z:
|
||||||
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
|
return isXbyak_ ? "ptr [64+zmm13*2+rcx]" : "[64+zmm13*2+rcx]";
|
||||||
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
|
case M_1to2: return isXbyak_ ? "ptr_b [eax+32]" : "[eax+32]{1to2}";
|
||||||
|
@ -607,7 +610,7 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
put(p->name, K, _XMM, _XMM | MEM, IMM8);
|
put(p->name, K, XMM, _XMM | MEM, IMM8);
|
||||||
if (!p->supportYMM) continue;
|
if (!p->supportYMM) continue;
|
||||||
put(p->name, K, _YMM, _YMM | MEM, IMM8);
|
put(p->name, K, _YMM, _YMM | MEM, IMM8);
|
||||||
put(p->name, K, _ZMM, _ZMM | MEM, IMM8);
|
put(p->name, K, _ZMM, _ZMM | MEM, IMM8);
|
||||||
|
@ -626,10 +629,10 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl *p = &tbl[i];
|
const Tbl *p = &tbl[i];
|
||||||
put(p->name, XMM | _XMM3, XMM_SAE | XMM | MEM);
|
put(p->name, XMM, XMM_SAE | XMM | MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
put("vcomiss", _XMM3, XMM | MEM);
|
put("vcomiss", XMM, _XMM3 | MEM);
|
||||||
put("vcomiss", XMM, XMM_SAE);
|
put("vcomiss", XMM, XMM_SAE);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -673,10 +676,10 @@ public:
|
||||||
"vpbroadcastq",
|
"vpbroadcastq",
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
put(tbl[i], XMM_KZ | ZMM_KZ, _XMM | _MEM);
|
put(tbl[i], XMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, _XMM | _MEM);
|
put("vbroadcasti32x2", XMM_KZ | YMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||||
put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
|
put("vbroadcasti32x4", YMM_KZ | ZMM_KZ, _MEM);
|
||||||
put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
|
put("vbroadcasti64x2", YMM_KZ | ZMM_KZ, _MEM);
|
||||||
put("vbroadcasti32x8", ZMM_KZ, _MEM);
|
put("vbroadcasti32x8", ZMM_KZ, _MEM);
|
||||||
|
@ -684,14 +687,14 @@ public:
|
||||||
}
|
}
|
||||||
void putMisc1()
|
void putMisc1()
|
||||||
{
|
{
|
||||||
put("vmaskmovps", XMM, XMM, MEM);
|
put("vmaskmovps", _XMM, _XMM, MEM);
|
||||||
put("vmaskmovps", YMM, YMM, MEM);
|
put("vmaskmovps", YMM, YMM, MEM);
|
||||||
|
|
||||||
put("vmaskmovpd", YMM, YMM, MEM);
|
put("vmaskmovpd", YMM, YMM, MEM);
|
||||||
put("vmaskmovpd", XMM, XMM, MEM);
|
put("vmaskmovpd", _XMM, _XMM, MEM);
|
||||||
|
|
||||||
put("vmaskmovps", MEM, XMM, XMM);
|
put("vmaskmovps", MEM, _XMM, _XMM);
|
||||||
put("vmaskmovpd", MEM, XMM, XMM);
|
put("vmaskmovpd", MEM, _XMM, _XMM);
|
||||||
|
|
||||||
put("vbroadcastf128", YMM, MEM);
|
put("vbroadcastf128", YMM, MEM);
|
||||||
put("vbroadcasti128", YMM, MEM);
|
put("vbroadcasti128", YMM, MEM);
|
||||||
|
@ -710,8 +713,8 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
put("vinsertf128", YMM, YMM, XMM | MEM, IMM8);
|
put("vinsertf128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
|
||||||
put("vinserti128", YMM, YMM, XMM | MEM, IMM8);
|
put("vinserti128", YMM, YMM, _XMM | _XMM2 | MEM, IMM8);
|
||||||
put("vperm2f128", YMM, YMM, YMM | MEM, IMM8);
|
put("vperm2f128", YMM, YMM, YMM | MEM, IMM8);
|
||||||
put("vperm2i128", YMM, YMM, YMM | MEM, IMM8);
|
put("vperm2i128", YMM, YMM, YMM | MEM, IMM8);
|
||||||
|
|
||||||
|
@ -721,9 +724,9 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const char *name = tbl[i];
|
const char *name = tbl[i];
|
||||||
put(name, XMM, XMM, MEM);
|
put(name, _XMM, _XMM, MEM);
|
||||||
put(name, YMM, YMM, MEM);
|
put(name, YMM, YMM, MEM);
|
||||||
put(name, MEM, XMM, XMM);
|
put(name, MEM, _XMM, _XMM);
|
||||||
put(name, MEM, YMM, YMM);
|
put(name, MEM, YMM, YMM);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -760,29 +763,29 @@ public:
|
||||||
put(name, MEM, ZMM);
|
put(name, MEM, ZMM);
|
||||||
put(name, ZMM, MEM);
|
put(name, ZMM, MEM);
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
put(name, MEM, _XMM3);
|
put(name, MEM, XMM);
|
||||||
put(name, _XMM3, MEM);
|
put(name, XMM, MEM);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void put_vmov()
|
void put_vmov()
|
||||||
{
|
{
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
put("vmovd", _XMM3, MEM|REG32);
|
put("vmovd", XMM, MEM|REG32);
|
||||||
put("vmovd", MEM|REG32, _XMM3);
|
put("vmovd", MEM|REG32, XMM);
|
||||||
put("vmovq", _XMM3, MEM|REG64|XMM);
|
put("vmovq", XMM, MEM|REG64|XMM);
|
||||||
put("vmovq", MEM|REG64|XMM, _XMM3);
|
put("vmovq", MEM|REG64|XMM, XMM);
|
||||||
put("vmovhlps", _XMM3, _XMM3, _XMM3);
|
put("vmovhlps", XMM, _XMM3, _XMM3);
|
||||||
put("vmovlhps", _XMM3, _XMM3, _XMM3);
|
put("vmovlhps", XMM, _XMM3, _XMM3);
|
||||||
put("vmovntdqa", _XMM3|_YMM3|ZMM, MEM);
|
put("vmovntdqa", XMM|_YMM3|ZMM, MEM);
|
||||||
put("vmovntdq", MEM, _XMM3 | _YMM3 | ZMM);
|
put("vmovntdq", MEM, XMM | _YMM3 | ZMM);
|
||||||
put("vmovntpd", MEM, _XMM3 | _YMM3 | ZMM);
|
put("vmovntpd", MEM, XMM | _YMM3 | ZMM);
|
||||||
put("vmovntps", MEM, _XMM3 | _YMM3 | ZMM);
|
put("vmovntps", MEM, XMM | _YMM3 | ZMM);
|
||||||
|
|
||||||
put("vmovsd", XMM_KZ, _XMM3, _XMM3);
|
put("vmovsd", XMM_KZ, XMM, _XMM3);
|
||||||
put("vmovsd", XMM_KZ, MEM);
|
put("vmovsd", XMM_KZ, MEM);
|
||||||
put("vmovsd", MEM_K, XMM);
|
put("vmovsd", MEM_K, XMM);
|
||||||
put("vmovss", XMM_KZ, _XMM3, _XMM3);
|
put("vmovss", XMM_KZ, XMM, _XMM3);
|
||||||
put("vmovss", XMM_KZ, MEM);
|
put("vmovss", XMM_KZ, MEM);
|
||||||
put("vmovss", MEM_K, XMM);
|
put("vmovss", MEM_K, XMM);
|
||||||
|
|
||||||
|
@ -797,7 +800,7 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const char *name = tbl[i];
|
const char *name = tbl[i];
|
||||||
put(name, XMM_KZ, _XMM, _XMM | MEM, IMM);
|
put(name, XMM_KZ, XMM, _XMM | MEM, IMM);
|
||||||
put(name, _YMM3, _YMM3, _YMM3 | _MEM, IMM);
|
put(name, _YMM3, _YMM3, _YMM3 | _MEM, IMM);
|
||||||
put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM);
|
put(name, _ZMM, _ZMM, _ZMM | _MEM, IMM);
|
||||||
}
|
}
|
||||||
|
@ -810,7 +813,7 @@ public:
|
||||||
"vmovlps",
|
"vmovlps",
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
put(tbl[i], _XMM3, _XMM3, MEM);
|
put(tbl[i], XMM, _XMM3, MEM);
|
||||||
put(tbl[i], MEM, _XMM3);
|
put(tbl[i], MEM, _XMM3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -836,11 +839,11 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
put(p.name, _XMM|XMM_KZ, _XMM|MEM);
|
put(p.name, XMM|XMM_KZ, _XMM|MEM);
|
||||||
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
|
put(p.name, _YMM|YMM_KZ, _YMM|MEM);
|
||||||
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
|
put(p.name, _ZMM|ZMM_KZ, _ZMM|MEM);
|
||||||
if (!p.M_X) continue;
|
if (!p.M_X) continue;
|
||||||
put(p.name, MEM|MEM_K, _XMM);
|
put(p.name, MEM|MEM_K, XMM);
|
||||||
put(p.name, MEM|MEM_K, _YMM);
|
put(p.name, MEM|MEM_K, _YMM);
|
||||||
put(p.name, MEM|MEM_K, _ZMM);
|
put(p.name, MEM|MEM_K, _ZMM);
|
||||||
}
|
}
|
||||||
|
@ -857,7 +860,7 @@ public:
|
||||||
put("vpabsd", ZMM_KZ, M_1to16 | _MEM);
|
put("vpabsd", ZMM_KZ, M_1to16 | _MEM);
|
||||||
put("vpabsq", ZMM_KZ, M_1to8 | _MEM);
|
put("vpabsq", ZMM_KZ, M_1to8 | _MEM);
|
||||||
|
|
||||||
put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, _XMM | _MEM);
|
put("vbroadcastf32x2", YMM_KZ | ZMM_KZ, XMM | _MEM);
|
||||||
put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
|
put("vbroadcastf32x4", YMM_KZ | ZMM_KZ, _MEM);
|
||||||
|
|
||||||
put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
|
put("vbroadcastf64x2", YMM_KZ | ZMM_KZ, _MEM);
|
||||||
|
@ -879,7 +882,7 @@ public:
|
||||||
};
|
};
|
||||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||||
const Tbl& p = tbl[i];
|
const Tbl& p = tbl[i];
|
||||||
put(p.name, XMM_KZ, _XMM, _XMM|p.mem);
|
put(p.name, XMM_KZ, XMM, _XMM|p.mem);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void put512_X3()
|
void put512_X3()
|
||||||
|
@ -891,54 +894,54 @@ public:
|
||||||
uint64_t x2;
|
uint64_t x2;
|
||||||
uint64_t xm;
|
uint64_t xm;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
{ "vpacksswb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpacksswb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
|
{ "vpacksswb", YMM_KZ, _YMM, _YMM | _MEM },
|
||||||
{ "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpacksswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpackssdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpackssdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
{ "vpackssdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
||||||
{ "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
{ "vpackssdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpackusdw", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpackusdw", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
{ "vpackusdw", YMM_KZ, _YMM, _YMM | M_1to8 | _MEM },
|
||||||
{ "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
{ "vpackusdw", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpackuswb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpackuswb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
|
{ "vpackuswb", YMM_KZ, _YMM, _YMM | _MEM },
|
||||||
{ "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpackuswb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpaddb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpaddb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpaddw", XMM_KZ, _XMM, _XMM | _MEM },
|
||||||
{ "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpaddd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpaddq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpaddsb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpaddsb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpaddsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpaddsw", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpaddsw", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpaddsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpaddusb", XMM_KZ, _XMM, _XMM | MEM },
|
{ "vpaddusb", XMM_KZ, XMM, _XMM | MEM },
|
||||||
{ "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
{ "vpaddusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||||
|
|
||||||
{ "vpaddusw", XMM_KZ, _XMM, _XMM | MEM },
|
{ "vpaddusw", XMM_KZ, XMM, _XMM | MEM },
|
||||||
{ "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
{ "vpaddusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||||
|
|
||||||
{ "vpsubb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpsubb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpsubw", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpsubw", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpsubd", XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpsubd", XMM_KZ, XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpsubq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpsubsb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpsubsb", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpsubsb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpsubsw", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpsubsw", XMM_KZ, XMM, _XMM | _MEM },
|
||||||
{ "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpsubsw", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
|
||||||
{ "vpsubusb", XMM_KZ, _XMM, _XMM | MEM },
|
{ "vpsubusb", XMM_KZ, XMM, _XMM | MEM },
|
||||||
{ "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
{ "vpsubusb", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||||
|
|
||||||
{ "vpsubusw", XMM_KZ, _XMM, _XMM | MEM },
|
{ "vpsubusw", XMM_KZ, XMM, _XMM | MEM },
|
||||||
{ "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
{ "vpsubusw", ZMM_KZ, _ZMM, _ZMM | MEM },
|
||||||
|
|
||||||
{ "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
{ "vpandd", ZMM_KZ, _ZMM, _ZMM | M_1to16 | _MEM },
|
||||||
|
@ -983,137 +986,137 @@ public:
|
||||||
{ "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
|
{ "vpminud", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to16 },
|
||||||
{ "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
|
{ "vpminuq", ZMM_KZ, _ZMM, _ZMM | _MEM | M_1to8 },
|
||||||
|
|
||||||
{ "vpslldq", _XMM3, _XMM3 | _MEM, IMM8 },
|
{ "vpslldq", XMM, _XMM3 | _MEM, IMM8 },
|
||||||
{ "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
{ "vpslldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
||||||
{ "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
|
{ "vpslldq", _ZMM, _ZMM | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsrldq", _XMM3, _XMM3 | _MEM, IMM8 },
|
{ "vpsrldq", XMM, _XMM3 | _MEM, IMM8 },
|
||||||
{ "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
{ "vpsrldq", _YMM3, _YMM3 | _MEM, IMM8 },
|
||||||
{ "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
|
{ "vpsrldq", _ZMM, _ZMM | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsraw", XMM_KZ, _XMM | _MEM, IMM8 },
|
{ "vpsraw", XMM_KZ, XMM | _MEM, IMM8 },
|
||||||
{ "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
{ "vpsraw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsrad", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
{ "vpsrad", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
|
||||||
{ "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
{ "vpsrad", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsraq", XMM, XMM, IMM8 },
|
{ "vpsraq", XMM, XMM, IMM8 },
|
||||||
{ "vpsraq", XMM_KZ, _XMM | M_1to2 | _MEM, IMM8 },
|
{ "vpsraq", XMM_KZ, XMM | M_1to2 | _MEM, IMM8 },
|
||||||
{ "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
|
{ "vpsraq", ZMM_KZ, _ZMM | M_1to8 | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsllw", _XMM3, _XMM3 | _MEM, IMM8 },
|
{ "vpsllw", XMM, _XMM3 | _MEM, IMM8 },
|
||||||
{ "vpslld", _XMM3, _XMM3 | _MEM | M_1to4, IMM8 },
|
{ "vpslld", XMM, _XMM3 | _MEM | M_1to4, IMM8 },
|
||||||
{ "vpsllq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
|
{ "vpsllq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||||
|
|
||||||
{ "vpsrlw", XMM_KZ, _XMM | _MEM, IMM8 },
|
{ "vpsrlw", XMM_KZ, XMM | _MEM, IMM8 },
|
||||||
{ "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
{ "vpsrlw", ZMM_KZ, _ZMM | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsrld", XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
{ "vpsrld", XMM_KZ, XMM | M_1to4 | _MEM, IMM8 },
|
||||||
{ "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
{ "vpsrld", ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpsrlq", _XMM3, _XMM3 | _MEM | M_1to2, IMM8 },
|
{ "vpsrlq", XMM, _XMM3 | _MEM | M_1to2, IMM8 },
|
||||||
{ "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
|
{ "vpsrlq", _ZMM, _ZMM | _MEM | M_1to8, IMM8 },
|
||||||
|
|
||||||
{ "vpsravw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsravw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsravw", _ZMM, _ZMM, _MEM },
|
{ "vpsravw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpsravd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsravd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM },
|
{ "vpsravd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpsravq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsravq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM },
|
{ "vpsravq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpsllvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsllvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsllvw", _ZMM, _ZMM, _MEM },
|
{ "vpsllvw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpsllvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsllvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
{ "vpsllvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpsllvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsllvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
{ "vpsllvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpsrlvw", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsrlvw", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsrlvw", _ZMM, _ZMM, _MEM },
|
{ "vpsrlvw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpsrlvd", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsrlvd", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
{ "vpsrlvd", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpsrlvq", XMM_KZ | _XMM, _XMM, _XMM | _MEM },
|
{ "vpsrlvq", XMM_KZ | XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
{ "vpsrlvq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpshufb", _XMM | XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpshufb", XMM | XMM_KZ, _XMM, _XMM | _MEM },
|
||||||
{ "vpshufb", ZMM_KZ, _ZMM, _MEM },
|
{ "vpshufb", ZMM_KZ, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpshufhw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
{ "vpshufhw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||||
{ "vpshufhw", ZMM_KZ, _MEM, IMM8 },
|
{ "vpshufhw", ZMM_KZ, _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpshuflw", _XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
{ "vpshuflw", XMM | XMM_KZ, _XMM | _MEM, IMM8 },
|
||||||
{ "vpshuflw", ZMM_KZ, _MEM, IMM8 },
|
{ "vpshuflw", ZMM_KZ, _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpshufd", _XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
{ "vpshufd", XMM | XMM_KZ, _XMM | M_1to4 | _MEM, IMM8 },
|
||||||
{ "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
{ "vpshufd", _ZMM | ZMM_KZ, _ZMM | M_1to16 | _MEM, IMM8 },
|
||||||
|
|
||||||
{ "vpord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
{ "vpord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vporq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
{ "vporq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||||
{ "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vporq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpxord", _XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
{ "vpxord", XMM | XMM_KZ, _XMM, _XMM | M_1to4 | _MEM },
|
||||||
{ "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
{ "vpxord", _ZMM | ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpxorq", _XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
{ "vpxorq", XMM | XMM_KZ, _XMM, _XMM | M_1to2 | _MEM },
|
||||||
{ "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpxorq", _ZMM | ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpsadbw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpsadbw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpsadbw", _ZMM, _ZMM, _MEM },
|
{ "vpsadbw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpmuldq", _XMM3, _XMM, _XMM | M_1to2 | _MEM },
|
{ "vpmuldq", XMM, _XMM, _XMM | M_1to2 | _MEM },
|
||||||
{ "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpmuldq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpmulhrsw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpmulhrsw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
|
{ "vpmulhrsw", ZMM_KZ, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpmulhuw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpmulhuw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
|
{ "vpmulhuw", ZMM_KZ, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpmulhw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpmulhw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpmulhw", ZMM_KZ, _ZMM, _MEM },
|
{ "vpmulhw", ZMM_KZ, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpmullw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpmullw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpmullw", ZMM_KZ, _ZMM, _MEM },
|
{ "vpmullw", ZMM_KZ, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpmulld", _XMM3, _XMM, M_1to4 | _MEM },
|
{ "vpmulld", XMM, _XMM, M_1to4 | _MEM },
|
||||||
{ "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
{ "vpmulld", ZMM_KZ, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpmullq", _XMM3, _XMM, M_1to2 | _MEM },
|
{ "vpmullq", XMM, _XMM, M_1to2 | _MEM },
|
||||||
{ "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpmullq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpmuludq", _XMM3, _XMM, M_1to2 | _MEM },
|
{ "vpmuludq", XMM, _XMM, M_1to2 | _MEM },
|
||||||
{ "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vpmuludq", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpunpckhbw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpunpckhbw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpunpckhbw", _ZMM, _ZMM, _MEM },
|
{ "vpunpckhbw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpunpckhwd", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpunpckhwd", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpunpckhwd", _ZMM, _ZMM, _MEM },
|
{ "vpunpckhwd", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpunpckhdq", _XMM3, _XMM, M_1to4 | _MEM },
|
{ "vpunpckhdq", XMM, _XMM, M_1to4 | _MEM },
|
||||||
{ "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM },
|
{ "vpunpckhdq", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpunpckhqdq", _XMM3, _XMM, M_1to2 | _MEM },
|
{ "vpunpckhqdq", XMM, _XMM, M_1to2 | _MEM },
|
||||||
{ "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
{ "vpunpckhqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vpunpcklbw", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpunpcklbw", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpunpcklbw", _ZMM, _ZMM, _MEM },
|
{ "vpunpcklbw", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpunpcklwd", _XMM3, _XMM, _XMM | _MEM },
|
{ "vpunpcklwd", XMM, _XMM, _XMM | _MEM },
|
||||||
{ "vpunpcklwd", _ZMM, _ZMM, _MEM },
|
{ "vpunpcklwd", _ZMM, _ZMM, _MEM },
|
||||||
|
|
||||||
{ "vpunpckldq", _XMM3, _XMM, M_1to4 | _MEM },
|
{ "vpunpckldq", XMM, _XMM, M_1to4 | _MEM },
|
||||||
{ "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM },
|
{ "vpunpckldq", _ZMM, _ZMM, M_1to16 | _MEM },
|
||||||
|
|
||||||
{ "vpunpcklqdq", _XMM3, _XMM, M_1to2 | _MEM },
|
{ "vpunpcklqdq", XMM, _XMM, M_1to2 | _MEM },
|
||||||
{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
{ "vpunpcklqdq", _ZMM, _ZMM, M_1to8 | _MEM },
|
||||||
|
|
||||||
{ "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
|
{ "vextractf32x4", _XMM | XMM_KZ | _MEM, _YMM | _ZMM, IMM8 },
|
||||||
|
@ -1126,7 +1129,7 @@ public:
|
||||||
{ "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
{ "vextracti32x8", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
||||||
{ "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
{ "vextracti64x4", _YMM | YMM_KZ | _MEM, _ZMM, IMM8 },
|
||||||
|
|
||||||
{ "vextractps", REG32 | _MEM, _XMM3, IMM8 },
|
{ "vextractps", REG32 | _MEM, XMM, IMM8 },
|
||||||
|
|
||||||
{ "vpermb", XMM_KZ, _XMM, _XMM | _MEM },
|
{ "vpermb", XMM_KZ, _XMM, _XMM | _MEM },
|
||||||
{ "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
{ "vpermb", ZMM_KZ, _ZMM, _ZMM | _MEM },
|
||||||
|
@ -1175,7 +1178,7 @@ public:
|
||||||
uint64_t xm;
|
uint64_t xm;
|
||||||
} tbl[] = {
|
} tbl[] = {
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
{ "vinsertps", _XMM, _XMM, _XMM3 | _MEM },
|
{ "vinsertps", XMM, _XMM, _XMM3 | _MEM },
|
||||||
|
|
||||||
{ "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM },
|
{ "vshufpd", XMM_KZ, _XMM, M_1to2 | _MEM },
|
||||||
{ "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
{ "vshufpd", ZMM_KZ, _ZMM, M_1to8 | _MEM },
|
||||||
|
@ -1208,14 +1211,14 @@ public:
|
||||||
put(p.name, p.x1, p.x2, p.xm, IMM8);
|
put(p.name, p.x1, p.x2, p.xm, IMM8);
|
||||||
}
|
}
|
||||||
#ifdef XBYAK64
|
#ifdef XBYAK64
|
||||||
put("vpextrb", _REG64 | _MEM, _XMM3, IMM8);
|
put("vpextrb", _REG64 | _MEM, XMM, IMM8);
|
||||||
put("vpextrw", _REG64 | _MEM, _XMM3, IMM8);
|
put("vpextrw", _REG64 | _MEM, XMM, IMM8);
|
||||||
put("vpextrd", _REG32 | _MEM, _XMM3, IMM8);
|
put("vpextrd", _REG32 | _MEM, XMM, IMM8);
|
||||||
put("vpextrq", _REG64 | _MEM, _XMM3, IMM8);
|
put("vpextrq", _REG64 | _MEM, XMM, IMM8);
|
||||||
put("vpinsrb", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
put("vpinsrb", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||||
put("vpinsrw", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
put("vpinsrw", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||||
put("vpinsrd", _XMM3, _XMM3, _REG32 | _MEM, IMM8);
|
put("vpinsrd", XMM, _XMM3, _REG32 | _MEM, IMM8);
|
||||||
put("vpinsrq", _XMM3, _XMM3, _REG64 | _MEM, IMM8);
|
put("vpinsrq", XMM, _XMM3, _REG64 | _MEM, IMM8);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
void put512_FMA()
|
void put512_FMA()
|
||||||
|
@ -1345,7 +1348,7 @@ public:
|
||||||
} else if (suf == "ps") {
|
} else if (suf == "ps") {
|
||||||
mem = M_1to4;
|
mem = M_1to4;
|
||||||
}
|
}
|
||||||
put(p, _XMM3 | XMM_KZ, _XMM, mem | _MEM);
|
put(p, XMM | XMM_KZ, _XMM, mem | _MEM);
|
||||||
if (!sufTbl[j].supportYMM) continue;
|
if (!sufTbl[j].supportYMM) continue;
|
||||||
mem = 0;
|
mem = 0;
|
||||||
if (suf == "pd") {
|
if (suf == "pd") {
|
||||||
|
@ -1466,23 +1469,23 @@ public:
|
||||||
put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
put("vcvtqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||||
put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
put("vcvtqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
||||||
|
|
||||||
put("vcvtsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
put("vcvtsd2si", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||||
|
|
||||||
put("vcvtsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
put("vcvtsd2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||||
|
|
||||||
put("vcvtsd2ss", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_ER);
|
put("vcvtsd2ss", XMM_KZ, XMM, _XMM3 | _MEM | XMM_ER);
|
||||||
|
|
||||||
put("vcvtsi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
put("vcvtsi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||||
put("vcvtsi2sd", XMM, XMM_ER, REG64);
|
put("vcvtsi2sd", XMM, XMM_ER, REG64);
|
||||||
|
|
||||||
put("vcvtsi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
put("vcvtsi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||||
put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64);
|
put("vcvtsi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||||
|
|
||||||
put("vcvtss2sd", XMM_KZ, _XMM3, _XMM3 | _MEM | XMM_SAE);
|
put("vcvtss2sd", XMM_KZ, XMM, _XMM3 | _MEM | XMM_SAE);
|
||||||
|
|
||||||
put("vcvtss2si", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
put("vcvtss2si", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||||
|
|
||||||
put("vcvtss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_ER);
|
put("vcvtss2usi", REG32 | REG64, XMM | _MEM | XMM_ER);
|
||||||
|
|
||||||
put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
|
put("vcvtpd2dq", XMM_KZ, _XMM | M_xword | M_1to2);
|
||||||
put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
|
put("vcvtpd2dq", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||||
|
@ -1516,13 +1519,13 @@ public:
|
||||||
put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
|
put("vcvttps2uqq", YMM_KZ, _XMM | _MEM | M_1to4);
|
||||||
put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
|
put("vcvttps2uqq", ZMM_KZ, _YMM | _MEM | M_1to8 | YMM_SAE);
|
||||||
|
|
||||||
put("vcvttsd2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
put("vcvttsd2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||||
|
|
||||||
put("vcvttsd2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
put("vcvttsd2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||||
|
|
||||||
put("vcvttss2si", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
put("vcvttss2si", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||||
|
|
||||||
put("vcvttss2usi", REG32 | REG64, _XMM3 | _MEM | XMM_SAE);
|
put("vcvttss2usi", REG32 | REG64, XMM | _MEM | XMM_SAE);
|
||||||
|
|
||||||
put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
|
put("vcvtudq2pd", XMM_KZ, _XMM | _MEM | M_1to2);
|
||||||
put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
|
put("vcvtudq2pd", YMM_KZ, _XMM | _MEM | M_1to4);
|
||||||
|
@ -1540,10 +1543,10 @@ public:
|
||||||
put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
put("vcvtuqq2ps", XMM_KZ, _YMM | M_yword | MY_1to4);
|
||||||
put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
put("vcvtuqq2ps", YMM_KZ, ZMM | _MEM | M_1to8 | ZMM_ER);
|
||||||
|
|
||||||
put("vcvtusi2sd", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
put("vcvtusi2sd", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||||
put("vcvtusi2sd", XMM, XMM_ER, REG64);
|
put("vcvtusi2sd", XMM, XMM_ER, REG64);
|
||||||
|
|
||||||
put("vcvtusi2ss", _XMM3, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
put("vcvtusi2ss", XMM, _XMM3, REG32 | REG64 | MEM32 | MEM64);
|
||||||
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
|
put("vcvtusi2ss", XMM, XMM_ER, REG32 | REG64);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,8 +40,8 @@ struct Code : Xbyak::CodeGenerator {
|
||||||
cmpss(xmm0, ptr[rip + label], 0);
|
cmpss(xmm0, ptr[rip + label], 0);
|
||||||
test(dword[rip + label], 33);
|
test(dword[rip + label], 33);
|
||||||
bt(dword[rip + label ], 3);
|
bt(dword[rip + label ], 3);
|
||||||
vblendpd(xmm0, dword[rip + label], 3);
|
vblendpd(xmm0, xmm0, dword[rip + label], 3);
|
||||||
vpalignr(xmm0, qword[rip + label], 4);
|
vpalignr(xmm0, xmm0, qword[rip + label], 4);
|
||||||
vextractf128(dword[rip + label], ymm3, 12);
|
vextractf128(dword[rip + label], ymm3, 12);
|
||||||
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
|
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
|
||||||
vcvtps2ph(ptr[rip + label], xmm2, 44);
|
vcvtps2ph(ptr[rip + label], xmm2, 44);
|
||||||
|
|
|
@ -129,6 +129,55 @@ struct Code : public Xbyak::CodeGenerator {
|
||||||
add(rax, sf.p[2]);
|
add(rax, sf.p[2]);
|
||||||
add(rax, sf.p[3]);
|
add(rax, sf.p[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
||||||
|
*/
|
||||||
|
void gen13()
|
||||||
|
{
|
||||||
|
StackFrame sf(this, 1, 13);
|
||||||
|
for (int i = 0; i < 13; i++) {
|
||||||
|
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
||||||
|
}
|
||||||
|
mov(rax, sf.t[0]);
|
||||||
|
for (int i = 1; i < 13; i++) {
|
||||||
|
add(rax, sf.t[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
same as gen13
|
||||||
|
*/
|
||||||
|
void gen14()
|
||||||
|
{
|
||||||
|
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
||||||
|
Pack t = sf.t;
|
||||||
|
t.append(rcx);
|
||||||
|
t.append(rdx);
|
||||||
|
for (int i = 0; i < 13; i++) {
|
||||||
|
mov(t[i], ptr[sf.p[0] + i * 8]);
|
||||||
|
}
|
||||||
|
mov(rax, t[0]);
|
||||||
|
for (int i = 1; i < 13; i++) {
|
||||||
|
add(rax, t[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
return (1 << 15) - 1;
|
||||||
|
*/
|
||||||
|
void gen15()
|
||||||
|
{
|
||||||
|
StackFrame sf(this, 0, 14, 8);
|
||||||
|
Pack t = sf.t;
|
||||||
|
t.append(rax);
|
||||||
|
for (int i = 0; i < 15; i++) {
|
||||||
|
mov(t[i], 1 << i);
|
||||||
|
}
|
||||||
|
mov(qword[rsp], 0);
|
||||||
|
for (int i = 0; i < 15; i++) {
|
||||||
|
add(ptr[rsp], t[i]);
|
||||||
|
}
|
||||||
|
mov(rax, ptr[rsp]);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Code2 : Xbyak::CodeGenerator {
|
struct Code2 : Xbyak::CodeGenerator {
|
||||||
|
@ -152,8 +201,14 @@ struct Code2 : Xbyak::CodeGenerator {
|
||||||
add(rax, sf.p[i]);
|
add(rax, sf.p[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void gen2(int pNum, int tNum, int stackSizeByte)
|
||||||
|
{
|
||||||
|
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
||||||
|
mov(rax, rsp);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static int errNum = 0;
|
static int errNum = 0;
|
||||||
void check(int x, int y)
|
void check(int x, int y)
|
||||||
{
|
{
|
||||||
|
@ -167,19 +222,19 @@ void verify(const Xbyak::uint8 *f, int pNum)
|
||||||
{
|
{
|
||||||
switch (pNum) {
|
switch (pNum) {
|
||||||
case 0:
|
case 0:
|
||||||
check(1, Xbyak::CastTo<int (*)()>(f)());
|
check(1, reinterpret_cast<int (*)()>(f)());
|
||||||
return;
|
return;
|
||||||
case 1:
|
case 1:
|
||||||
check(11, Xbyak::CastTo<int (*)(int)>(f)(10));
|
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
||||||
return;
|
return;
|
||||||
case 2:
|
case 2:
|
||||||
check(111, Xbyak::CastTo<int (*)(int, int)>(f)(10, 100));
|
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
||||||
return;
|
return;
|
||||||
case 3:
|
case 3:
|
||||||
check(1111, Xbyak::CastTo<int (*)(int, int, int)>(f)(10, 100, 1000));
|
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
||||||
return;
|
return;
|
||||||
case 4:
|
case 4:
|
||||||
check(11111, Xbyak::CastTo<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
printf("ERR pNum=%d\n", pNum);
|
printf("ERR pNum=%d\n", pNum);
|
||||||
|
@ -212,6 +267,15 @@ void testAll()
|
||||||
const Xbyak::uint8 *f = code.getCurr();
|
const Xbyak::uint8 *f = code.getCurr();
|
||||||
code.gen(pNum, tNum | opt, stackSize);
|
code.gen(pNum, tNum | opt, stackSize);
|
||||||
verify(f, pNum);
|
verify(f, pNum);
|
||||||
|
/*
|
||||||
|
check rsp is 16-byte aligned if stackSize > 0
|
||||||
|
*/
|
||||||
|
if (stackSize > 0) {
|
||||||
|
Code2 c2;
|
||||||
|
c2.gen2(pNum, tNum | opt, stackSize);
|
||||||
|
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
||||||
|
check(addr % 16, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -268,6 +332,20 @@ void testPartial()
|
||||||
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
||||||
code.gen12();
|
code.gen12();
|
||||||
check(24, f12(3, 5, 7, 9));
|
check(24, f12(3, 5, 7, 9));
|
||||||
|
|
||||||
|
{
|
||||||
|
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
||||||
|
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||||
|
code.gen13();
|
||||||
|
check(91, f13(tbl));
|
||||||
|
|
||||||
|
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
||||||
|
code.gen14();
|
||||||
|
check(91, f14(tbl));
|
||||||
|
}
|
||||||
|
int (*f15)() = code.getCurr<int (*)()>();
|
||||||
|
code.gen15();
|
||||||
|
check((1 << 15) - 1, f15());
|
||||||
}
|
}
|
||||||
|
|
||||||
void put(const Xbyak::util::Pack& p)
|
void put(const Xbyak::util::Pack& p)
|
||||||
|
|
108
xbyak/xbyak.h
108
xbyak/xbyak.h
|
@ -40,6 +40,8 @@
|
||||||
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
||||||
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
||||||
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
||||||
|
#include <unordered_set>
|
||||||
|
#define XBYAK_STD_UNORDERED_SET std::unordered_set
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
||||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
||||||
|
@ -49,16 +51,22 @@
|
||||||
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
||||||
*/
|
*/
|
||||||
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
||||||
|
#include <tr1/unordered_set>
|
||||||
|
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||||
#include <tr1/unordered_map>
|
#include <tr1/unordered_map>
|
||||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||||
|
|
||||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
||||||
|
#include <unordered_set>
|
||||||
|
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
#include <set>
|
||||||
|
#define XBYAK_STD_UNORDERED_SET std::set
|
||||||
#include <map>
|
#include <map>
|
||||||
#define XBYAK_STD_UNORDERED_MAP std::map
|
#define XBYAK_STD_UNORDERED_MAP std::map
|
||||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
||||||
|
@ -105,7 +113,7 @@ namespace Xbyak {
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||||
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
|
VERSION = 0x5770 /* 0xABCD = A.BC(D) */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||||
|
@ -178,7 +186,8 @@ enum {
|
||||||
ERR_INVALID_ZERO,
|
ERR_INVALID_ZERO,
|
||||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||||
ERR_INVALID_MIB_ADDRESS,
|
ERR_INVALID_MIB_ADDRESS,
|
||||||
ERR_INTERNAL
|
ERR_INTERNAL,
|
||||||
|
ERR_X2APIC_IS_NOT_SUPPORTED
|
||||||
};
|
};
|
||||||
|
|
||||||
class Error : public std::exception {
|
class Error : public std::exception {
|
||||||
|
@ -240,6 +249,7 @@ public:
|
||||||
"invalid rip in AutoGrow",
|
"invalid rip in AutoGrow",
|
||||||
"invalid mib address",
|
"invalid mib address",
|
||||||
"internal error",
|
"internal error",
|
||||||
|
"x2APIC is not supported"
|
||||||
};
|
};
|
||||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||||
return errTbl[err_];
|
return errTbl[err_];
|
||||||
|
@ -617,6 +627,12 @@ struct RegRip {
|
||||||
const Label* label_;
|
const Label* label_;
|
||||||
bool isAddr_;
|
bool isAddr_;
|
||||||
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||||
|
friend const RegRip operator+(const RegRip& r, int disp) {
|
||||||
|
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||||
|
}
|
||||||
|
friend const RegRip operator-(const RegRip& r, int disp) {
|
||||||
|
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||||
|
}
|
||||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||||
}
|
}
|
||||||
|
@ -786,6 +802,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
|
||||||
|
|
||||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||||
void *const AutoGrow = (void*)1; //-V566
|
void *const AutoGrow = (void*)1; //-V566
|
||||||
|
void *const DontSetProtectRWE = (void*)2; //-V566
|
||||||
|
|
||||||
class CodeArray {
|
class CodeArray {
|
||||||
enum Type {
|
enum Type {
|
||||||
|
@ -825,6 +842,7 @@ protected:
|
||||||
size_t size_;
|
size_t size_;
|
||||||
bool isCalledCalcJmpAddress_;
|
bool isCalledCalcJmpAddress_;
|
||||||
|
|
||||||
|
bool useProtect() const { return alloc_->useProtect(); }
|
||||||
/*
|
/*
|
||||||
allocate new memory and copy old data to the new area
|
allocate new memory and copy old data to the new area
|
||||||
*/
|
*/
|
||||||
|
@ -848,7 +866,6 @@ protected:
|
||||||
uint64 disp = i->getVal(top_);
|
uint64 disp = i->getVal(top_);
|
||||||
rewrite(i->codeOffset, disp, i->jmpSize);
|
rewrite(i->codeOffset, disp, i->jmpSize);
|
||||||
}
|
}
|
||||||
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
|
|
||||||
isCalledCalcJmpAddress_ = true;
|
isCalledCalcJmpAddress_ = true;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
@ -858,7 +875,7 @@ public:
|
||||||
PROTECT_RE = 2 // read/exec
|
PROTECT_RE = 2 // read/exec
|
||||||
};
|
};
|
||||||
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
||||||
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
|
||||||
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
||||||
, maxSize_(maxSize)
|
, maxSize_(maxSize)
|
||||||
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
|
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
|
||||||
|
@ -866,7 +883,7 @@ public:
|
||||||
, isCalledCalcJmpAddress_(false)
|
, isCalledCalcJmpAddress_(false)
|
||||||
{
|
{
|
||||||
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
||||||
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
|
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
|
||||||
alloc_->free(top_);
|
alloc_->free(top_);
|
||||||
throw Error(ERR_CANT_PROTECT);
|
throw Error(ERR_CANT_PROTECT);
|
||||||
}
|
}
|
||||||
|
@ -874,10 +891,19 @@ public:
|
||||||
virtual ~CodeArray()
|
virtual ~CodeArray()
|
||||||
{
|
{
|
||||||
if (isAllocType()) {
|
if (isAllocType()) {
|
||||||
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
|
if (useProtect()) setProtectModeRW(false);
|
||||||
alloc_->free(top_);
|
alloc_->free(top_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bool setProtectMode(ProtectMode mode, bool throwException = true)
|
||||||
|
{
|
||||||
|
bool isOK = protect(top_, maxSize_, mode);
|
||||||
|
if (isOK) return true;
|
||||||
|
if (throwException) throw Error(ERR_CANT_PROTECT);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
|
||||||
|
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
|
||||||
void resetSize()
|
void resetSize()
|
||||||
{
|
{
|
||||||
size_ = 0;
|
size_ = 0;
|
||||||
|
@ -909,10 +935,10 @@ public:
|
||||||
void dq(uint64 code) { db(code, 8); }
|
void dq(uint64 code) { db(code, 8); }
|
||||||
const uint8 *getCode() const { return top_; }
|
const uint8 *getCode() const { return top_; }
|
||||||
template<class F>
|
template<class F>
|
||||||
const F getCode() const { return CastTo<F>(top_); }
|
const F getCode() const { return reinterpret_cast<F>(top_); }
|
||||||
const uint8 *getCurr() const { return &top_[size_]; }
|
const uint8 *getCurr() const { return &top_[size_]; }
|
||||||
template<class F>
|
template<class F>
|
||||||
const F getCurr() const { return CastTo<F>(&top_[size_]); }
|
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
|
||||||
size_t getSize() const { return size_; }
|
size_t getSize() const { return size_; }
|
||||||
void setSize(size_t size)
|
void setSize(size_t size)
|
||||||
{
|
{
|
||||||
|
@ -995,6 +1021,9 @@ public:
|
||||||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
|
||||||
|
#endif
|
||||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
|
@ -1115,6 +1144,7 @@ public:
|
||||||
Label(const Label& rhs);
|
Label(const Label& rhs);
|
||||||
Label& operator=(const Label& rhs);
|
Label& operator=(const Label& rhs);
|
||||||
~Label();
|
~Label();
|
||||||
|
void clear() { mgr = 0; id = 0; }
|
||||||
int getId() const { return id; }
|
int getId() const { return id; }
|
||||||
const uint8 *getAddress() const;
|
const uint8 *getAddress() const;
|
||||||
|
|
||||||
|
@ -1153,6 +1183,7 @@ class LabelManager {
|
||||||
};
|
};
|
||||||
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
||||||
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
||||||
|
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
|
||||||
|
|
||||||
CodeArray *base_;
|
CodeArray *base_;
|
||||||
// global : stateList_.front(), local : stateList_.back()
|
// global : stateList_.front(), local : stateList_.back()
|
||||||
|
@ -1160,6 +1191,7 @@ class LabelManager {
|
||||||
mutable int labelId_;
|
mutable int labelId_;
|
||||||
ClabelDefList clabelDefList_;
|
ClabelDefList clabelDefList_;
|
||||||
ClabelUndefList clabelUndefList_;
|
ClabelUndefList clabelUndefList_;
|
||||||
|
LabelPtrList labelPtrList_;
|
||||||
|
|
||||||
int getId(const Label& label) const
|
int getId(const Label& label) const
|
||||||
{
|
{
|
||||||
|
@ -1208,9 +1240,14 @@ class LabelManager {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
friend class Label;
|
friend class Label;
|
||||||
void incRefCount(int id) { clabelDefList_[id].refCount++; }
|
void incRefCount(int id, Label *label)
|
||||||
void decRefCount(int id)
|
|
||||||
{
|
{
|
||||||
|
clabelDefList_[id].refCount++;
|
||||||
|
labelPtrList_.insert(label);
|
||||||
|
}
|
||||||
|
void decRefCount(int id, Label *label)
|
||||||
|
{
|
||||||
|
labelPtrList_.erase(label);
|
||||||
ClabelDefList::iterator i = clabelDefList_.find(id);
|
ClabelDefList::iterator i = clabelDefList_.find(id);
|
||||||
if (i == clabelDefList_.end()) return;
|
if (i == clabelDefList_.end()) return;
|
||||||
if (i->second.refCount == 1) {
|
if (i->second.refCount == 1) {
|
||||||
|
@ -1229,11 +1266,23 @@ class LabelManager {
|
||||||
#endif
|
#endif
|
||||||
return !list.empty();
|
return !list.empty();
|
||||||
}
|
}
|
||||||
|
// detach all labels linked to LabelManager
|
||||||
|
void resetLabelPtrList()
|
||||||
|
{
|
||||||
|
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
|
||||||
|
(*i)->clear();
|
||||||
|
}
|
||||||
|
labelPtrList_.clear();
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
LabelManager()
|
LabelManager()
|
||||||
{
|
{
|
||||||
reset();
|
reset();
|
||||||
}
|
}
|
||||||
|
~LabelManager()
|
||||||
|
{
|
||||||
|
resetLabelPtrList();
|
||||||
|
}
|
||||||
void reset()
|
void reset()
|
||||||
{
|
{
|
||||||
base_ = 0;
|
base_ = 0;
|
||||||
|
@ -1243,6 +1292,7 @@ public:
|
||||||
stateList_.push_back(SlabelState());
|
stateList_.push_back(SlabelState());
|
||||||
clabelDefList_.clear();
|
clabelDefList_.clear();
|
||||||
clabelUndefList_.clear();
|
clabelUndefList_.clear();
|
||||||
|
resetLabelPtrList();
|
||||||
}
|
}
|
||||||
void enterLocal()
|
void enterLocal()
|
||||||
{
|
{
|
||||||
|
@ -1275,10 +1325,11 @@ public:
|
||||||
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
||||||
define_inner(st.defList, st.undefList, label, base_->getSize());
|
define_inner(st.defList, st.undefList, label, base_->getSize());
|
||||||
}
|
}
|
||||||
void defineClabel(const Label& label)
|
void defineClabel(Label& label)
|
||||||
{
|
{
|
||||||
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
||||||
label.mgr = this;
|
label.mgr = this;
|
||||||
|
labelPtrList_.insert(&label);
|
||||||
}
|
}
|
||||||
void assign(Label& dst, const Label& src)
|
void assign(Label& dst, const Label& src)
|
||||||
{
|
{
|
||||||
|
@ -1286,6 +1337,7 @@ public:
|
||||||
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
||||||
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
||||||
dst.mgr = this;
|
dst.mgr = this;
|
||||||
|
labelPtrList_.insert(&dst);
|
||||||
}
|
}
|
||||||
bool getOffset(size_t *offset, std::string& label) const
|
bool getOffset(size_t *offset, std::string& label) const
|
||||||
{
|
{
|
||||||
|
@ -1333,19 +1385,19 @@ inline Label::Label(const Label& rhs)
|
||||||
{
|
{
|
||||||
id = rhs.id;
|
id = rhs.id;
|
||||||
mgr = rhs.mgr;
|
mgr = rhs.mgr;
|
||||||
if (mgr) mgr->incRefCount(id);
|
if (mgr) mgr->incRefCount(id, this);
|
||||||
}
|
}
|
||||||
inline Label& Label::operator=(const Label& rhs)
|
inline Label& Label::operator=(const Label& rhs)
|
||||||
{
|
{
|
||||||
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
||||||
id = rhs.id;
|
id = rhs.id;
|
||||||
mgr = rhs.mgr;
|
mgr = rhs.mgr;
|
||||||
if (mgr) mgr->incRefCount(id);
|
if (mgr) mgr->incRefCount(id, this);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
inline Label::~Label()
|
inline Label::~Label()
|
||||||
{
|
{
|
||||||
if (id && mgr) mgr->decRefCount(id);
|
if (id && mgr) mgr->decRefCount(id, this);
|
||||||
}
|
}
|
||||||
inline const uint8* Label::getAddress() const
|
inline const uint8* Label::getAddress() const
|
||||||
{
|
{
|
||||||
|
@ -1463,6 +1515,7 @@ private:
|
||||||
T_B64 = 1 << 27, // m64bcst
|
T_B64 = 1 << 27, // m64bcst
|
||||||
T_M_K = 1 << 28, // mem{k}
|
T_M_K = 1 << 28, // mem{k}
|
||||||
T_VSIB = 1 << 29,
|
T_VSIB = 1 << 29,
|
||||||
|
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||||
T_XXX
|
T_XXX
|
||||||
};
|
};
|
||||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||||
|
@ -1500,7 +1553,7 @@ private:
|
||||||
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
|
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
|
||||||
{
|
{
|
||||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||||
int w = (type & T_EW1) ? 1 : 0;
|
int w = (type & T_EW1) ? 1 : 0;
|
||||||
|
@ -1543,7 +1596,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool Vp = !(v ? v->isExtIdx2() : 0);
|
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
|
||||||
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
||||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||||
db(0x62);
|
db(0x62);
|
||||||
|
@ -1935,10 +1988,11 @@ private:
|
||||||
const Address& addr = op2.getAddress();
|
const Address& addr = op2.getAddress();
|
||||||
const RegExp& regExp = addr.getRegExp();
|
const RegExp& regExp = addr.getRegExp();
|
||||||
const Reg& base = regExp.getBase();
|
const Reg& base = regExp.getBase();
|
||||||
|
const Reg& index = regExp.getIndex();
|
||||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||||
int disp8N = 0;
|
int disp8N = 0;
|
||||||
bool x = regExp.getIndex().isExtIdx();
|
bool x = index.isExtIdx();
|
||||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||||
int aaa = addr.getOpmaskIdx();
|
int aaa = addr.getOpmaskIdx();
|
||||||
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
||||||
bool b = false;
|
bool b = false;
|
||||||
|
@ -1946,8 +2000,8 @@ private:
|
||||||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||||
b = true;
|
b = true;
|
||||||
}
|
}
|
||||||
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
|
int VL = regExp.isVsib() ? index.getBit() : 0;
|
||||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
|
||||||
} else {
|
} else {
|
||||||
vex(r, base, p1, type, code, x);
|
vex(r, base, p1, type, code, x);
|
||||||
}
|
}
|
||||||
|
@ -2147,7 +2201,8 @@ public:
|
||||||
const Segment es, cs, ss, ds, fs, gs;
|
const Segment es, cs, ss, ds, fs, gs;
|
||||||
#endif
|
#endif
|
||||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||||
void L(const Label& label) { labelMgr_.defineClabel(label); }
|
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||||
|
Label L() { Label label; L(label); return label; }
|
||||||
void inLocalLabel() { labelMgr_.enterLocal(); }
|
void inLocalLabel() { labelMgr_.enterLocal(); }
|
||||||
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
||||||
/*
|
/*
|
||||||
|
@ -2178,7 +2233,7 @@ public:
|
||||||
// call(function pointer)
|
// call(function pointer)
|
||||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||||
template<class Ret, class... Params>
|
template<class Ret, class... Params>
|
||||||
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
|
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
|
||||||
#endif
|
#endif
|
||||||
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
||||||
|
|
||||||
|
@ -2436,11 +2491,16 @@ public:
|
||||||
MUST call ready() to complete generating code if you use AutoGrow mode.
|
MUST call ready() to complete generating code if you use AutoGrow mode.
|
||||||
It is not necessary for the other mode if hasUndefinedLabel() is true.
|
It is not necessary for the other mode if hasUndefinedLabel() is true.
|
||||||
*/
|
*/
|
||||||
void ready()
|
void ready(ProtectMode mode = PROTECT_RWE)
|
||||||
{
|
{
|
||||||
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
|
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
|
||||||
if (isAutoGrow()) calcJmpAddress();
|
if (isAutoGrow()) {
|
||||||
|
calcJmpAddress();
|
||||||
|
if (useProtect()) setProtectMode(mode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// set read/exec
|
||||||
|
void readyRE() { return ready(PROTECT_RE); }
|
||||||
#ifdef XBYAK_TEST
|
#ifdef XBYAK_TEST
|
||||||
void dump(bool doClear = true)
|
void dump(bool doClear = true)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
const char *getVersionString() const { return "5.67"; }
|
const char *getVersionString() const { return "5.77"; }
|
||||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||||
|
@ -1023,7 +1023,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
||||||
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
|
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
|
||||||
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
|
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
|
||||||
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
|
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
|
||||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||||
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
|
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
|
||||||
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
|
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
|
||||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
|
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
|
||||||
|
@ -1206,28 +1206,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
|
||||||
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
|
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
|
||||||
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
|
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
|
||||||
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
|
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
|
||||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||||
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
|
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
|
||||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||||
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
|
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
|
||||||
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
|
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
|
||||||
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
|
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
|
||||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||||
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
|
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
|
||||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||||
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
|
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
|
||||||
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
|
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
|
||||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||||
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
|
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
|
||||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||||
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
|
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
|
||||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||||
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
|
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
|
||||||
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
|
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
|
||||||
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
|
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
|
||||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||||
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
|
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
|
||||||
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
|
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
|
||||||
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
|
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
|
||||||
|
|
|
@ -9,6 +9,11 @@
|
||||||
*/
|
*/
|
||||||
#include "xbyak.h"
|
#include "xbyak.h"
|
||||||
|
|
||||||
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||||
|
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||||
|
@ -47,14 +52,30 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Xbyak { namespace util {
|
namespace Xbyak { namespace util {
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
SmtLevel = 1,
|
||||||
|
CoreLevel = 2
|
||||||
|
} IntelCpuTopologyLevel;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
CPU detection class
|
CPU detection class
|
||||||
*/
|
*/
|
||||||
class Cpu {
|
class Cpu {
|
||||||
uint64 type_;
|
uint64 type_;
|
||||||
|
//system topology
|
||||||
|
bool x2APIC_supported_;
|
||||||
|
static const size_t maxTopologyLevels = 2;
|
||||||
|
unsigned int numCores_[maxTopologyLevels];
|
||||||
|
|
||||||
|
static const unsigned int maxNumberCacheLevels = 10;
|
||||||
|
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||||
|
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||||
|
unsigned int dataCacheLevels_;
|
||||||
|
|
||||||
unsigned int get32bitAsBE(const char *x) const
|
unsigned int get32bitAsBE(const char *x) const
|
||||||
{
|
{
|
||||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||||
|
@ -65,7 +86,7 @@ class Cpu {
|
||||||
}
|
}
|
||||||
void setFamily()
|
void setFamily()
|
||||||
{
|
{
|
||||||
unsigned int data[4];
|
unsigned int data[4] = {};
|
||||||
getCpuid(1, data);
|
getCpuid(1, data);
|
||||||
stepping = data[0] & mask(4);
|
stepping = data[0] & mask(4);
|
||||||
model = (data[0] >> 4) & mask(4);
|
model = (data[0] >> 4) & mask(4);
|
||||||
|
@ -88,6 +109,39 @@ class Cpu {
|
||||||
{
|
{
|
||||||
return (val >> base) & ((1u << (end - base)) - 1);
|
return (val >> base) & ((1u << (end - base)) - 1);
|
||||||
}
|
}
|
||||||
|
void setNumCores()
|
||||||
|
{
|
||||||
|
if ((type_ & tINTEL) == 0) return;
|
||||||
|
|
||||||
|
unsigned int data[4] = {};
|
||||||
|
|
||||||
|
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||||
|
getCpuidEx(0x0, 0, data);
|
||||||
|
if (data[0] >= 0xB) {
|
||||||
|
/*
|
||||||
|
if leaf 11 exists(x2APIC is supported),
|
||||||
|
we use it to get the number of smt cores and cores on socket
|
||||||
|
|
||||||
|
leaf 0xB can be zeroed-out by a hypervisor
|
||||||
|
*/
|
||||||
|
x2APIC_supported_ = true;
|
||||||
|
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||||
|
getCpuidEx(0xB, i, data);
|
||||||
|
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||||
|
if (level == SmtLevel || level == CoreLevel) {
|
||||||
|
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
Failed to deremine num of cores without x2APIC support.
|
||||||
|
TODO: USE initial APIC ID to determine ncores.
|
||||||
|
*/
|
||||||
|
numCores_[SmtLevel - 1] = 0;
|
||||||
|
numCores_[CoreLevel - 1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
void setCacheHierarchy()
|
void setCacheHierarchy()
|
||||||
{
|
{
|
||||||
if ((type_ & tINTEL) == 0) return;
|
if ((type_ & tINTEL) == 0) return;
|
||||||
|
@ -96,21 +150,12 @@ class Cpu {
|
||||||
// const unsigned int INSTRUCTION_CACHE = 2;
|
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||||
const unsigned int UNIFIED_CACHE = 3;
|
const unsigned int UNIFIED_CACHE = 3;
|
||||||
unsigned int smt_width = 0;
|
unsigned int smt_width = 0;
|
||||||
unsigned int n_cores = 0;
|
unsigned int logical_cores = 0;
|
||||||
unsigned int data[4];
|
unsigned int data[4] = {};
|
||||||
|
|
||||||
/*
|
if (x2APIC_supported_) {
|
||||||
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
smt_width = numCores_[0];
|
||||||
If x2APIC is supported, these are the only correct numbers.
|
logical_cores = numCores_[1];
|
||||||
|
|
||||||
leaf 0xB can be zeroed-out by a hypervisor
|
|
||||||
*/
|
|
||||||
getCpuidEx(0x0, 0, data);
|
|
||||||
if (data[0] >= 0xB) {
|
|
||||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
|
||||||
smt_width = data[1] & 0x7FFF;
|
|
||||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
|
||||||
n_cores = data[1] & 0x7FFF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -118,29 +163,29 @@ class Cpu {
|
||||||
the first level of data cache is not shared (which is the
|
the first level of data cache is not shared (which is the
|
||||||
case for every existing architecture) and use this to
|
case for every existing architecture) and use this to
|
||||||
determine the SMT width for arch not supporting leaf 11.
|
determine the SMT width for arch not supporting leaf 11.
|
||||||
when leaf 4 reports a number of core less than n_cores
|
when leaf 4 reports a number of core less than numCores_
|
||||||
on socket reported by leaf 11, then it is a correct number
|
on socket reported by leaf 11, then it is a correct number
|
||||||
of cores not an upperbound.
|
of cores not an upperbound.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||||
getCpuidEx(0x4, i, data);
|
getCpuidEx(0x4, i, data);
|
||||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||||
if (cacheType == NO_CACHE) break;
|
if (cacheType == NO_CACHE) break;
|
||||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||||
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||||
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||||
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||||
}
|
}
|
||||||
assert(nb_logical_cores != 0);
|
assert(actual_logical_cores != 0);
|
||||||
data_cache_size[data_cache_levels] =
|
dataCacheSize_[dataCacheLevels_] =
|
||||||
(extractBit(data[1], 22, 31) + 1)
|
(extractBit(data[1], 22, 31) + 1)
|
||||||
* (extractBit(data[1], 12, 21) + 1)
|
* (extractBit(data[1], 12, 21) + 1)
|
||||||
* (extractBit(data[1], 0, 11) + 1)
|
* (extractBit(data[1], 0, 11) + 1)
|
||||||
* (data[2] + 1);
|
* (data[2] + 1);
|
||||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||||
assert(smt_width != 0);
|
assert(smt_width != 0);
|
||||||
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
|
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||||
data_cache_levels++;
|
dataCacheLevels_++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -154,22 +199,25 @@ public:
|
||||||
int displayFamily; // family + extFamily
|
int displayFamily; // family + extFamily
|
||||||
int displayModel; // model + extModel
|
int displayModel; // model + extModel
|
||||||
|
|
||||||
// may I move these members into private?
|
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||||
static const unsigned int maxNumberCacheLevels = 10;
|
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||||
unsigned int data_cache_size[maxNumberCacheLevels];
|
switch (level) {
|
||||||
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
case SmtLevel: return numCores_[level - 1];
|
||||||
unsigned int data_cache_levels;
|
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||||
|
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||||
{
|
{
|
||||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||||
return cores_sharing_data_cache[i];
|
return coresSharignDataCache_[i];
|
||||||
}
|
}
|
||||||
unsigned int getDataCacheSize(unsigned int i) const
|
unsigned int getDataCacheSize(unsigned int i) const
|
||||||
{
|
{
|
||||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||||
return data_cache_size[i];
|
return dataCacheSize_[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -177,30 +225,45 @@ public:
|
||||||
*/
|
*/
|
||||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
|
#ifdef _MSC_VER
|
||||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
(void)eaxIn;
|
||||||
|
(void)data;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
|
#ifdef _MSC_VER
|
||||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||||
#else
|
#else
|
||||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
(void)eaxIn;
|
||||||
|
(void)ecxIn;
|
||||||
|
(void)data;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
static inline uint64 getXfeature()
|
static inline uint64 getXfeature()
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
|
#ifdef _MSC_VER
|
||||||
return _xgetbv(0);
|
return _xgetbv(0);
|
||||||
#else
|
#else
|
||||||
unsigned int eax, edx;
|
unsigned int eax, edx;
|
||||||
// xgetvb is not support on gcc 4.2
|
// xgetvb is not support on gcc 4.2
|
||||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||||
return ((uint64)edx << 32) | eax;
|
return ((uint64)edx << 32) | eax;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
typedef uint64 Type;
|
typedef uint64 Type;
|
||||||
|
@ -271,9 +334,13 @@ public:
|
||||||
|
|
||||||
Cpu()
|
Cpu()
|
||||||
: type_(NONE)
|
: type_(NONE)
|
||||||
, data_cache_levels(0)
|
, x2APIC_supported_(false)
|
||||||
|
, numCores_()
|
||||||
|
, dataCacheSize_()
|
||||||
|
, coresSharignDataCache_()
|
||||||
|
, dataCacheLevels_(0)
|
||||||
{
|
{
|
||||||
unsigned int data[4];
|
unsigned int data[4] = {};
|
||||||
const unsigned int& EAX = data[0];
|
const unsigned int& EAX = data[0];
|
||||||
const unsigned int& EBX = data[1];
|
const unsigned int& EBX = data[1];
|
||||||
const unsigned int& ECX = data[2];
|
const unsigned int& ECX = data[2];
|
||||||
|
@ -363,6 +430,7 @@ public:
|
||||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||||
}
|
}
|
||||||
setFamily();
|
setFamily();
|
||||||
|
setNumCores();
|
||||||
setCacheHierarchy();
|
setCacheHierarchy();
|
||||||
}
|
}
|
||||||
void putFamily() const
|
void putFamily() const
|
||||||
|
@ -381,12 +449,17 @@ class Clock {
|
||||||
public:
|
public:
|
||||||
static inline uint64 getRdtsc()
|
static inline uint64 getRdtsc()
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||||
|
#ifdef _MSC_VER
|
||||||
return __rdtsc();
|
return __rdtsc();
|
||||||
#else
|
#else
|
||||||
unsigned int eax, edx;
|
unsigned int eax, edx;
|
||||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||||
return ((uint64)edx << 32) | eax;
|
return ((uint64)edx << 32) | eax;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||||
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
Clock()
|
Clock()
|
||||||
|
@ -416,7 +489,7 @@ const int UseRCX = 1 << 6;
|
||||||
const int UseRDX = 1 << 7;
|
const int UseRDX = 1 << 7;
|
||||||
|
|
||||||
class Pack {
|
class Pack {
|
||||||
static const size_t maxTblNum = 10;
|
static const size_t maxTblNum = 15;
|
||||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||||
size_t n_;
|
size_t n_;
|
||||||
public:
|
public:
|
||||||
|
@ -476,7 +549,7 @@ public:
|
||||||
const Xbyak::Reg64& operator[](size_t n) const
|
const Xbyak::Reg64& operator[](size_t n) const
|
||||||
{
|
{
|
||||||
if (n >= n_) {
|
if (n >= n_) {
|
||||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||||
throw Error(ERR_BAD_PARAMETER);
|
throw Error(ERR_BAD_PARAMETER);
|
||||||
}
|
}
|
||||||
return *tbl_[n];
|
return *tbl_[n];
|
||||||
|
@ -518,6 +591,7 @@ class StackFrame {
|
||||||
static const int rcxPos = 3;
|
static const int rcxPos = 3;
|
||||||
static const int rdxPos = 2;
|
static const int rdxPos = 2;
|
||||||
#endif
|
#endif
|
||||||
|
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||||
Xbyak::CodeGenerator *code_;
|
Xbyak::CodeGenerator *code_;
|
||||||
int pNum_;
|
int pNum_;
|
||||||
int tNum_;
|
int tNum_;
|
||||||
|
@ -527,7 +601,7 @@ class StackFrame {
|
||||||
int P_;
|
int P_;
|
||||||
bool makeEpilog_;
|
bool makeEpilog_;
|
||||||
Xbyak::Reg64 pTbl_[4];
|
Xbyak::Reg64 pTbl_[4];
|
||||||
Xbyak::Reg64 tTbl_[10];
|
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||||
Pack p_;
|
Pack p_;
|
||||||
Pack t_;
|
Pack t_;
|
||||||
StackFrame(const StackFrame&);
|
StackFrame(const StackFrame&);
|
||||||
|
@ -539,7 +613,7 @@ public:
|
||||||
make stack frame
|
make stack frame
|
||||||
@param sf [in] this
|
@param sf [in] this
|
||||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||||
@param stackSizeByte [in] local stack size
|
@param stackSizeByte [in] local stack size
|
||||||
@param makeEpilog [in] automatically call close() if true
|
@param makeEpilog [in] automatically call close() if true
|
||||||
|
|
||||||
|
@ -566,27 +640,17 @@ public:
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||||
const Reg64& _rsp = code->rsp;
|
const Reg64& _rsp = code->rsp;
|
||||||
const AddressFrame& _ptr = code->ptr;
|
|
||||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||||
const int *tbl = getOrderTbl() + noSaveNum;
|
const int *tbl = getOrderTbl() + noSaveNum;
|
||||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
for (int i = 0; i < saveNum_; i++) {
|
||||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
code->push(Reg64(tbl[i]));
|
||||||
|
}
|
||||||
|
P_ = (stackSizeByte + 7) / 8;
|
||||||
|
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||||
P_ *= 8;
|
P_ *= 8;
|
||||||
if (P_ > 0) code->sub(_rsp, P_);
|
if (P_ > 0) code->sub(_rsp, P_);
|
||||||
#ifdef XBYAK64_WIN
|
|
||||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
|
||||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
|
||||||
}
|
|
||||||
for (int i = 4; i < saveNum_; i++) {
|
|
||||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (int i = 0; i < saveNum_; i++) {
|
|
||||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
for (int i = 0; i < pNum; i++) {
|
for (int i = 0; i < pNum; i++) {
|
||||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||||
|
@ -607,21 +671,11 @@ public:
|
||||||
{
|
{
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
const Reg64& _rsp = code_->rsp;
|
const Reg64& _rsp = code_->rsp;
|
||||||
const AddressFrame& _ptr = code_->ptr;
|
|
||||||
const int *tbl = getOrderTbl() + noSaveNum;
|
const int *tbl = getOrderTbl() + noSaveNum;
|
||||||
#ifdef XBYAK64_WIN
|
|
||||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
|
||||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
|
||||||
}
|
|
||||||
for (int i = 4; i < saveNum_; i++) {
|
|
||||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
for (int i = 0; i < saveNum_; i++) {
|
|
||||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (P_ > 0) code_->add(_rsp, P_);
|
if (P_ > 0) code_->add(_rsp, P_);
|
||||||
|
for (int i = 0; i < saveNum_; i++) {
|
||||||
|
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||||
|
}
|
||||||
|
|
||||||
if (callRet) code_->ret();
|
if (callRet) code_->ret();
|
||||||
}
|
}
|
||||||
|
@ -633,9 +687,6 @@ public:
|
||||||
} catch (std::exception& e) {
|
} catch (std::exception& e) {
|
||||||
printf("ERR:StackFrame %s\n", e.what());
|
printf("ERR:StackFrame %s\n", e.what());
|
||||||
exit(1);
|
exit(1);
|
||||||
} catch (...) {
|
|
||||||
printf("ERR:StackFrame otherwise\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
@ -654,7 +705,7 @@ private:
|
||||||
}
|
}
|
||||||
int getRegIdx(int& pos) const
|
int getRegIdx(int& pos) const
|
||||||
{
|
{
|
||||||
assert(pos < 14);
|
assert(pos < maxRegNum);
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
const int *tbl = getOrderTbl();
|
const int *tbl = getOrderTbl();
|
||||||
int r = tbl[pos++];
|
int r = tbl[pos++];
|
||||||
|
|
Loading…
Reference in a new issue