080b4b3aff
4a6fac8a update version to 5.77 801cf3fd cosmetic change of getNumCores d397e824 fix number of cores that share LLC cache a669e092 support non-intel-cpu visual studio af5f422e Merge branch 'fenghaitao-guard_x86' into develop 9b98dc17 Guard x86 specific codes with "#if defined(__i386__) || defined(__x86_64__)" dd4173e1 move some member variables input private f72646a7 update version 4612528f format change 4b95e862 Merge branch 'shelleygoel-master' 4c262fa6 add functionality to get num of cores using x2APIC ID bc70e7e1 recover Xbyak::CastTo d09a230f unlink Label when LabelManager is destroyed 973e8597 update version afdb9fe9 Xbyak::CastTo is removed b011aca4 add RegRip +/- int acae93cd increase max temp regs for StackFrame ea4e3562 util::StackFrame uses push/pop instead of mov 42462ef9 use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm); da9117a9 update version of readme.md d35f4fb7 fix the encoding of vinsertps for disp8N 1de435ed bf uses Label class 613922bd add Label L() for convenience 43e15583 fix typo 93579ee6 add protect-re.cpp 60004b5c fix url of protect-re.cpp 348b2709 fix typo of doc f34f6ed5 update manual 232110be update test 82b78bf0 add setProtectMode dd8b290f put warning message if pageSize != 4096 64775ca2 a little refactoring 7c3e7b85 fix wrong VSIB encoding with idx >= 16 git-subtree-dir: externals/xbyak git-subtree-split: 4a6fac8ade404f667b94170f713367fe7da2a852
416 lines
8.2 KiB
C++
416 lines
8.2 KiB
C++
#define XBYAK_NO_OP_NAMES
|
|
#include <xbyak/xbyak_util.h>
|
|
|
|
#ifdef XBYAK32
|
|
#error "this sample is for only 64-bit mode"
|
|
#endif
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
struct Code : public Xbyak::CodeGenerator {
|
|
void gen1()
|
|
{
|
|
StackFrame sf(this, 1);
|
|
mov(rax, sf.p[0]);
|
|
}
|
|
void gen2()
|
|
{
|
|
StackFrame sf(this, 2);
|
|
lea(rax, ptr [sf.p[0] + sf.p[1]]);
|
|
}
|
|
void gen3()
|
|
{
|
|
StackFrame sf(this, 3);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
}
|
|
void gen4()
|
|
{
|
|
StackFrame sf(this, 4);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
add(rax, sf.p[3]);
|
|
}
|
|
|
|
void gen5()
|
|
{
|
|
StackFrame sf(this, 4, UseRCX);
|
|
xor_(rcx, rcx);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
add(rax, sf.p[3]);
|
|
}
|
|
|
|
void gen6()
|
|
{
|
|
StackFrame sf(this, 4, UseRCX | UseRDX);
|
|
xor_(rcx, rcx);
|
|
xor_(rdx, rdx);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
add(rax, sf.p[3]);
|
|
}
|
|
|
|
void gen7()
|
|
{
|
|
StackFrame sf(this, 3, UseRCX | UseRDX);
|
|
xor_(rcx, rcx);
|
|
xor_(rdx, rdx);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
}
|
|
|
|
void gen8()
|
|
{
|
|
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
|
|
xor_(rcx, rcx);
|
|
xor_(rdx, rdx);
|
|
mov(sf.t[0], 1);
|
|
mov(sf.t[1], 2);
|
|
mov(sf.t[2], 3);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
}
|
|
|
|
void gen9()
|
|
{
|
|
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
|
|
xor_(rcx, rcx);
|
|
xor_(rdx, rdx);
|
|
mov(sf.t[0], 1);
|
|
mov(sf.t[1], 2);
|
|
mov(sf.t[2], 3);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
mov(ptr [rsp + 8 * 0], rax);
|
|
mov(ptr [rsp + 8 * 1], rax);
|
|
mov(ptr [rsp + 8 * 2], rax);
|
|
mov(ptr [rsp + 8 * 3], rax);
|
|
}
|
|
|
|
void gen10()
|
|
{
|
|
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
|
|
xor_(rcx, rcx);
|
|
xor_(rdx, rdx);
|
|
for (int i = 0; i < 8; i++) {
|
|
mov(sf.t[i], i);
|
|
}
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
add(rax, sf.p[3]);
|
|
mov(ptr [rsp + 8 * 0], rax);
|
|
mov(ptr [rsp + 8 * 1], rax);
|
|
mov(ptr [rsp + 8 * 2], rax);
|
|
mov(ptr [rsp + 8 * 3], rax);
|
|
}
|
|
|
|
void gen11()
|
|
{
|
|
StackFrame sf(this, 0, UseRCX);
|
|
xor_(rcx, rcx);
|
|
mov(rax, 3);
|
|
}
|
|
|
|
void gen12()
|
|
{
|
|
StackFrame sf(this, 4, UseRDX);
|
|
xor_(rdx, rdx);
|
|
mov(rax, sf.p[0]);
|
|
add(rax, sf.p[1]);
|
|
add(rax, sf.p[2]);
|
|
add(rax, sf.p[3]);
|
|
}
|
|
|
|
/*
|
|
int64_t f(const int64_t a[13]) { return sum-of-a[]; }
|
|
*/
|
|
void gen13()
|
|
{
|
|
StackFrame sf(this, 1, 13);
|
|
for (int i = 0; i < 13; i++) {
|
|
mov(sf.t[i], ptr[sf.p[0] + i * 8]);
|
|
}
|
|
mov(rax, sf.t[0]);
|
|
for (int i = 1; i < 13; i++) {
|
|
add(rax, sf.t[i]);
|
|
}
|
|
}
|
|
/*
|
|
same as gen13
|
|
*/
|
|
void gen14()
|
|
{
|
|
StackFrame sf(this, 1, 11 | UseRCX | UseRDX);
|
|
Pack t = sf.t;
|
|
t.append(rcx);
|
|
t.append(rdx);
|
|
for (int i = 0; i < 13; i++) {
|
|
mov(t[i], ptr[sf.p[0] + i * 8]);
|
|
}
|
|
mov(rax, t[0]);
|
|
for (int i = 1; i < 13; i++) {
|
|
add(rax, t[i]);
|
|
}
|
|
}
|
|
/*
|
|
return (1 << 15) - 1;
|
|
*/
|
|
void gen15()
|
|
{
|
|
StackFrame sf(this, 0, 14, 8);
|
|
Pack t = sf.t;
|
|
t.append(rax);
|
|
for (int i = 0; i < 15; i++) {
|
|
mov(t[i], 1 << i);
|
|
}
|
|
mov(qword[rsp], 0);
|
|
for (int i = 0; i < 15; i++) {
|
|
add(ptr[rsp], t[i]);
|
|
}
|
|
mov(rax, ptr[rsp]);
|
|
}
|
|
};
|
|
|
|
struct Code2 : Xbyak::CodeGenerator {
|
|
Code2()
|
|
: Xbyak::CodeGenerator(4096 * 32)
|
|
{
|
|
}
|
|
void gen(int pNum, int tNum, int stackSizeByte)
|
|
{
|
|
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
|
if (tNum & UseRCX) xor_(rcx, rcx);
|
|
if (tNum & UseRDX) xor_(rdx, rdx);
|
|
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
|
|
mov(sf.t[i], 5);
|
|
}
|
|
for (int i = 0; i < stackSizeByte; i++) {
|
|
mov(byte [rsp + i], 0);
|
|
}
|
|
mov(rax, 1);
|
|
for (int i = 0; i < pNum; i++) {
|
|
add(rax, sf.p[i]);
|
|
}
|
|
}
|
|
void gen2(int pNum, int tNum, int stackSizeByte)
|
|
{
|
|
StackFrame sf(this, pNum, tNum, stackSizeByte);
|
|
mov(rax, rsp);
|
|
}
|
|
};
|
|
|
|
|
|
static int errNum = 0;
|
|
void check(int x, int y)
|
|
{
|
|
if (x != y) {
|
|
printf("err x=%d, y=%d\n", x, y);
|
|
errNum++;
|
|
}
|
|
}
|
|
|
|
void verify(const Xbyak::uint8 *f, int pNum)
|
|
{
|
|
switch (pNum) {
|
|
case 0:
|
|
check(1, reinterpret_cast<int (*)()>(f)());
|
|
return;
|
|
case 1:
|
|
check(11, reinterpret_cast<int (*)(int)>(f)(10));
|
|
return;
|
|
case 2:
|
|
check(111, reinterpret_cast<int (*)(int, int)>(f)(10, 100));
|
|
return;
|
|
case 3:
|
|
check(1111, reinterpret_cast<int (*)(int, int, int)>(f)(10, 100, 1000));
|
|
return;
|
|
case 4:
|
|
check(11111, reinterpret_cast<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
|
|
return;
|
|
default:
|
|
printf("ERR pNum=%d\n", pNum);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
void testAll()
|
|
{
|
|
Code2 code;
|
|
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
|
|
for (int pNum = 0; pNum < 4; pNum++) {
|
|
for (int mode = 0; mode < 4; mode++) {
|
|
int maxNum = 0;
|
|
int opt = 0;
|
|
if (mode == 0) {
|
|
maxNum = 10;
|
|
} else if (mode == 1) {
|
|
maxNum = 9;
|
|
opt = UseRCX;
|
|
} else if (mode == 2) {
|
|
maxNum = 9;
|
|
opt = UseRDX;
|
|
} else {
|
|
maxNum = 8;
|
|
opt = UseRCX | UseRDX;
|
|
}
|
|
for (int tNum = 0; tNum < maxNum; tNum++) {
|
|
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
|
const Xbyak::uint8 *f = code.getCurr();
|
|
code.gen(pNum, tNum | opt, stackSize);
|
|
verify(f, pNum);
|
|
/*
|
|
check rsp is 16-byte aligned if stackSize > 0
|
|
*/
|
|
if (stackSize > 0) {
|
|
Code2 c2;
|
|
c2.gen2(pNum, tNum | opt, stackSize);
|
|
uint64_t addr = c2.getCode<uint64_t (*)()>()();
|
|
check(addr % 16, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void testPartial()
|
|
{
|
|
Code code;
|
|
int (*f1)(int) = code.getCurr<int (*)(int)>();
|
|
code.gen1();
|
|
check(5, f1(5));
|
|
|
|
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
|
|
code.gen2();
|
|
check(9, f2(3, 6));
|
|
|
|
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
|
code.gen3();
|
|
check(14, f3(1, 4, 9));
|
|
|
|
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
|
code.gen4();
|
|
check(30, f4(1, 4, 9, 16));
|
|
|
|
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
|
code.gen5();
|
|
check(23, f5(2, 5, 7, 9));
|
|
|
|
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
|
code.gen6();
|
|
check(18, f6(3, 4, 5, 6));
|
|
|
|
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
|
code.gen7();
|
|
check(12, f7(3, 4, 5));
|
|
|
|
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
|
code.gen8();
|
|
check(23, f8(5, 8, 10));
|
|
|
|
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
|
|
code.gen9();
|
|
check(60, f9(10, 20, 30));
|
|
|
|
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
|
code.gen10();
|
|
check(100, f10(10, 20, 30, 40));
|
|
|
|
int (*f11)() = code.getCurr<int (*)()>();
|
|
code.gen11();
|
|
check(3, f11());
|
|
|
|
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
|
|
code.gen12();
|
|
check(24, f12(3, 5, 7, 9));
|
|
|
|
{
|
|
int64_t tbl[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };
|
|
int64_t (*f13)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
|
code.gen13();
|
|
check(91, f13(tbl));
|
|
|
|
int64_t (*f14)(const int64_t*) = code.getCurr<int64_t (*)(const int64_t*)>();
|
|
code.gen14();
|
|
check(91, f14(tbl));
|
|
}
|
|
int (*f15)() = code.getCurr<int (*)()>();
|
|
code.gen15();
|
|
check((1 << 15) - 1, f15());
|
|
}
|
|
|
|
void put(const Xbyak::util::Pack& p)
|
|
{
|
|
for (size_t i = 0, n = p.size(); i < n; i++) {
|
|
printf("%s ", p[i].toString());
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
|
|
{
|
|
for (size_t i = 0; i < tblNum; i++) {
|
|
check(p[i].getIdx(), tbl[i]);
|
|
}
|
|
}
|
|
|
|
void testPack()
|
|
{
|
|
const int N = 10;
|
|
Xbyak::Reg64 regTbl[N];
|
|
for (int i = 0; i < N; i++) {
|
|
regTbl[i] = Xbyak::Reg64(i);
|
|
}
|
|
Xbyak::util::Pack p(regTbl, N);
|
|
const struct {
|
|
int pos;
|
|
int num;
|
|
int tbl[10];
|
|
} tbl[] = {
|
|
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
|
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
|
|
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
|
|
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
|
|
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
|
|
{ 5, 5, { 5, 6, 7, 8, 9 } },
|
|
{ 6, 4, { 6, 7, 8, 9 } },
|
|
{ 7, 3, { 7, 8, 9 } },
|
|
{ 8, 2, { 8, 9 } },
|
|
{ 9, 1, { 9 } },
|
|
{ 3, 5, { 3, 4, 5, 6, 7 } },
|
|
};
|
|
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
|
|
const int pos = tbl[i].pos;
|
|
const int num = tbl[i].num;
|
|
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
|
|
if (pos + num == N) {
|
|
verifyPack(p.sub(pos), tbl[i].tbl, num);
|
|
}
|
|
}
|
|
}
|
|
|
|
int main()
|
|
try
|
|
{
|
|
testAll();
|
|
|
|
testPartial();
|
|
testPack();
|
|
printf("errNum=%d\n", errNum);
|
|
} catch (std::exception& e) {
|
|
printf("err %s\n", e.what());
|
|
return 1;
|
|
} catch (...) {
|
|
puts("ERR");
|
|
return 1;
|
|
}
|
|
|