Squashed 'externals/xbyak/' content from commit d512551e

git-subtree-dir: externals/xbyak
git-subtree-split: d512551e914737300ba35f3c049d1b40effbe76d
This commit is contained in:
MerryMage 2020-04-22 20:25:57 +01:00
commit 4ed09fda06
79 changed files with 22734 additions and 0 deletions

6
CMakeLists.txt Normal file
View file

@ -0,0 +1,6 @@
cmake_minimum_required(VERSION 2.6)
project(xbyak)
file(GLOB headers xbyak/*.h)
install(FILES ${headers} DESTINATION include/xbyak)

47
COPYRIGHT Normal file
View file

@ -0,0 +1,47 @@
Copyright (c) 2007 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。

24
Makefile Normal file
View file

@ -0,0 +1,24 @@
PREFIX=/usr/local
INSTALL_DIR=$(PREFIX)/include/xbyak
all:
$(MAKE) -C sample
clean:
$(MAKE) -C sample clean
install:
mkdir -p $(INSTALL_DIR)
cp -pR xbyak/*.h $(INSTALL_DIR)
uninstall:
rm -i $(INSTALL_DIR)/*.h
rmdir $(INSTALL_DIR)
update:
$(MAKE) -C gen
test:
$(MAKE) -C test test
.PHONY: test update

26
gen/Makefile Normal file
View file

@ -0,0 +1,26 @@
TARGET=../xbyak/xbyak_mnemonic.h
BIN=sortline gen_code gen_avx512
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
all: $(TARGET)
sortline: sortline.cpp
$(CXX) $(CFLAGS) $< -o $@
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp
$(CXX) $(CFLAGS) $< -o $@
$(TARGET): $(BIN)
./gen_code | ./sortline > $@
echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@
./gen_code omit | ./sortline >> $@
echo "#endif" >>$@
./gen_code fixed >> $@
echo "#ifndef XBYAK_DISABLE_AVX512" >> $@
./gen_avx512 | ./sortline >> $@
echo "#ifdef XBYAK64" >> $@
./gen_avx512 64 | ./sortline >> $@
echo "#endif" >> $@
echo "#endif" >> $@
clean:
$(RM) $(BIN) $(TARGET)

160
gen/avx_type.hpp Normal file
View file

@ -0,0 +1,160 @@
#include <assert.h>
// copy CodeGenerator::AVXtype
enum AVXtype {
// low 3 bit
T_N1 = 1,
T_N2 = 2,
T_N4 = 3,
T_N8 = 4,
T_N16 = 5,
T_N32 = 6,
T_NX_MASK = 7,
//
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
T_DUP = 1 << 4, // N = (8, 32, 64)
T_66 = 1 << 5,
T_F3 = 1 << 6,
T_F2 = 1 << 7,
T_0F = 1 << 8,
T_0F38 = 1 << 9,
T_0F3A = 1 << 10,
T_L0 = 1 << 11,
T_L1 = 1 << 12,
T_W0 = 1 << 13,
T_W1 = 1 << 14,
T_EW0 = 1 << 15,
T_EW1 = 1 << 16,
T_YMM = 1 << 17, // support YMM, ZMM
T_EVEX = 1 << 18,
T_ER_X = 1 << 19, // xmm{er}
T_ER_Y = 1 << 20, // ymm{er}
T_ER_Z = 1 << 21, // zmm{er}
T_SAE_X = 1 << 22, // xmm{sae}
T_SAE_Y = 1 << 23, // ymm{sae}
T_SAE_Z = 1 << 24, // zmm{sae}
T_MUST_EVEX = 1 << 25, // contains T_EVEX
T_B32 = 1 << 26, // m32bcst
T_B64 = 1 << 27, // m64bcst
T_M_K = 1 << 28, // mem{k}
T_XXX
};
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
std::string type2String(int type)
{
std::string str;
int low = type & T_NX_MASK;
if (0 < low) {
const char *tbl[8] = {
"T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32"
};
assert(low < int(sizeof(tbl) / sizeof(tbl[0])));
str = tbl[low - 1];
}
if (type & T_N_VL) {
if (!str.empty()) str += " | ";
str += "T_N_VL";
}
if (type & T_DUP) {
if (!str.empty()) str += " | ";
str += "T_DUP";
}
if (type & T_66) {
if (!str.empty()) str += " | ";
str += "T_66";
}
if (type & T_F3) {
if (!str.empty()) str += " | ";
str += "T_F3";
}
if (type & T_F2) {
if (!str.empty()) str += " | ";
str += "T_F2";
}
if (type & T_0F) {
if (!str.empty()) str += " | ";
str += "T_0F";
}
if (type & T_0F38) {
if (!str.empty()) str += " | ";
str += "T_0F38";
}
if (type & T_0F3A) {
if (!str.empty()) str += " | ";
str += "T_0F3A";
}
if (type & T_L0) {
if (!str.empty()) str += " | ";
str += "VEZ_L0";
}
if (type & T_L1) {
if (!str.empty()) str += " | ";
str += "VEZ_L1";
}
if (type & T_W0) {
if (!str.empty()) str += " | ";
str += "T_W0";
}
if (type & T_W1) {
if (!str.empty()) str += " | ";
str += "T_W1";
}
if (type & T_EW0) {
if (!str.empty()) str += " | ";
str += "T_EW0";
}
if (type & T_EW1) {
if (!str.empty()) str += " | ";
str += "T_EW1";
}
if (type & T_YMM) {
if (!str.empty()) str += " | ";
str += "T_YMM";
}
if (type & T_EVEX) {
if (!str.empty()) str += " | ";
str += "T_EVEX";
}
if (type & T_ER_X) {
if (!str.empty()) str += " | ";
str += "T_ER_X";
}
if (type & T_ER_Y) {
if (!str.empty()) str += " | ";
str += "T_ER_Y";
}
if (type & T_ER_Z) {
if (!str.empty()) str += " | ";
str += "T_ER_Z";
}
if (type & T_SAE_X) {
if (!str.empty()) str += " | ";
str += "T_SAE_X";
}
if (type & T_SAE_Y) {
if (!str.empty()) str += " | ";
str += "T_SAE_Y";
}
if (type & T_SAE_Z) {
if (!str.empty()) str += " | ";
str += "T_SAE_Z";
}
if (type & T_MUST_EVEX) {
if (!str.empty()) str += " | ";
str += "T_MUST_EVEX";
}
if (type & T_B32) {
if (!str.empty()) str += " | ";
str += "T_B32";
}
if (type & T_B64) {
if (!str.empty()) str += " | ";
str += "T_B64";
}
if (type & T_M_K) {
if (!str.empty()) str += " | ";
str += "T_M_K";
}
return str;
}

17
gen/b2hex.cpp Normal file
View file

@ -0,0 +1,17 @@
#include <stdio.h>
int main()
{
puts("enum {");
for (int i = 0; i < 256; i++) {
printf(" B");
for (int j = 0; j < 8; j++) {
putchar(i & (1 << (7 - j)) ? '1' : '0');
}
printf("= %d", i);
if (i < 255) putchar(',');
putchar('\n');
}
puts("};");
return 0;
}

697
gen/gen_avx512.cpp Normal file
View file

@ -0,0 +1,697 @@
#define XBYAK_DONT_READ_LIST
#include <stdio.h>
#include <string.h>
#include "../xbyak/xbyak.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#define snprintf _snprintf_s
#endif
#include "avx_type.hpp"
void putOpmask(bool only64bit)
{
if (only64bit) {
puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }");
puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }");
return;
}
{
const struct Tbl {
const char *name;
uint8 code;
} tbl[] = {
{ "kadd", 0x4A },
{ "kand", 0x41 },
{ "kandn", 0x42 },
{ "kor", 0x45 },
{ "kxnor", 0x46 },
{ "kxor", 0x47 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
}
printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n");
printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n");
printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n");
}
{
const struct Tbl {
const char *name;
uint8 code;
} tbl[] = {
{ "knot", 0x44 },
{ "kortest", 0x98 },
{ "ktest", 0x99 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code);
printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code);
}
}
{
const struct Tbl {
const char *name;
uint8 code;
} tbl[] = {
{ "kshiftl", 0x32 },
{ "kshiftr", 0x30 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
printf("void %sw(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code);
printf("void %sq(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1);
printf("void %sb(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code);
printf("void %sd(const Opmask& r1, const Opmask& r2, uint8 imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1);
}
}
puts("void kmovw(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }");
puts("void kmovq(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }");
puts("void kmovb(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }");
puts("void kmovd(const Opmask& k, const Operand& op) { opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }");
puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }");
puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }");
puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }");
puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }");
puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }");
puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }");
puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }");
puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }");
puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }");
puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }");
}
// vcmppd(k, x, op)
void putVcmp()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66, true },
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM, true },
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false },
{ 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
{ 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
{ 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true },
{ 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true },
{ 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true },
{ 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
{ 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true },
{ 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
// XM_X
void putX_XM()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
} tbl[] = {
{ 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
// putCvt
{ 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
{ 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z },
{ 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z },
{ 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
{ 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z },
{ 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z },
{ 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }");
puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }");
puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }");
puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }");
puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }");
puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }");
puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }");
}
void putM_X()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
} tbl[] = {
{ 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z },
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
}
void putXM_X()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
} tbl[] = {
{ 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
{ 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 },
{ 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code);
}
}
void putX_X_XM_IMM()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true },
{ 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true },
{ 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
{ 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
{ 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false },
{ 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false },
{ 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false },
{ 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false },
{ 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false },
{ 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false },
{ 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false },
{ 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false },
{ 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false },
{ 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true },
{ 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true },
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
{ 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
{ 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false },
{ 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false },
{ 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
void putShift()
{
const struct Tbl {
const char *name;
uint8 code;
int idx;
int type;
} tbl[] = {
{ "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 },
{ "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
{ "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
{ "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 },
{ "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code);
}
}
void putExtractInsert()
{
{
const struct Tbl {
const char *name;
uint8 code;
int type;
bool isZMM;
} tbl[] = {
{ "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
{ "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM";
printf("void %s(const Operand& op, const %s& r, uint8 imm) { if (!op.is(%s)) throw Error(ERR_BAD_COMBINATION); opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code);
}
}
{
const struct Tbl {
const char *name;
uint8 code;
int type;
bool isZMM;
} tbl[] = {
{ "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
{ "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false },
{ "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false },
{ "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true },
{ "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
const char *x = p.isZMM ? "Zmm" : "Ymm";
const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))";
printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8 imm) {"
"if (!%s) throw Error(ERR_BAD_COMBINATION); "
"opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code);
}
}
}
void putBroadcast(bool only64bit)
{
{
const struct Tbl {
uint8 code;
const char *name;
int type;
int reg;
} tbl[] = {
{ 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 },
{ 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 },
{ 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 },
{ 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64},
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) {
printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code);
}
}
}
if (only64bit) return;
puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }");
puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }");
puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }");
puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }");
puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }");
puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }");
puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }");
puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }");
puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }");
puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }");
}
void putCvt()
{
puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }");
puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }");
puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }");
puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }");
puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }");
puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }");
puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }");
puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }");
puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }");
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
}
enum { // same as xbyak.h
xx_yy_zz = 0,
xx_yx_zy = 1,
xx_xy_yz = 2,
};
void putGather()
{
const struct Tbl {
const char *name;
int type;
uint8 code;
int mode;
} tbl[] = {
{ "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz },
{ "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy },
{ "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz },
{ "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz },
{ "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz },
{ "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy },
{ "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz },
{ "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
void putScatter()
{
const struct Tbl {
const char *name;
int type;
uint8 code;
int mode; // reverse of gather
} tbl[] = {
{ "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz },
{ "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy },
{ "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz },
{ "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz },
{ "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz },
{ "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy },
{ "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz },
{ "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode);
}
}
void putShuff()
{
puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }");
puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }");
puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }");
puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }");
}
void putMov()
{
puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }");
puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }");
puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }");
puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }");
puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }");
puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }");
puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }");
puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }");
{
const struct Tbl {
uint8_t code;
const char *name;
int type;
int mode;
} tbl[] = {
{ 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false },
{ 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false },
{ 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
{ 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl& p = tbl[i];
std::string type = type2String(p.type);
printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false");
}
}
}
void putX_XM_IMM()
{
const struct Tbl {
uint8 code;
const char *name;
int type;
bool hasIMM;
} tbl[] = {
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
{ 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
const Tbl *p = &tbl[i];
std::string type = type2String(p->type);
printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n"
, p->name, p->hasIMM ? ", uint8 imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
}
}
void putMisc()
{
puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }");
puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }");
puts("void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }");
puts("void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }");
puts("void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }");
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
}
void putV4FMA()
{
puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }");
puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }");
puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }");
puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }");
puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }");
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
}
int main(int argc, char *[])
{
bool only64bit = argc == 2;
putOpmask(only64bit);
putBroadcast(only64bit);
if (only64bit) return 0;
putVcmp();
putX_XM();
putM_X();
putXM_X();
putX_X_XM_IMM();
putShift();
putExtractInsert();
putCvt();
putGather();
putShuff();
putMov();
putX_XM_IMM();
putMisc();
putScatter();
putV4FMA();
}

1782
gen/gen_code.cpp Normal file

File diff suppressed because it is too large Load diff

23
gen/sortline.cpp Normal file
View file

@ -0,0 +1,23 @@
#include <iostream>
#include <fstream>
#include <string>
#include <set>
typedef std::set<std::string> StrSet;
int main()
{
StrSet ss;
std::string line;
while (std::getline(std::cin, line)) {
if (!line.empty() && line[line.size() - 1] == '\n') {
line.resize(line.size() - 1);
}
if (!line.empty()) {
ss.insert(line);
}
}
for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) {
std::cout << *i << std::endl;
}
}

17
gen/update.bat Normal file
View file

@ -0,0 +1,17 @@
@echo off
set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS
set TARGET=..\\xbyak\\xbyak_mnemonic.h
set SORT=sortline
cl gen_code.cpp %OPT%
gen_code | %SORT% > %TARGET%
echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET%
gen_code omit | %SORT% >> %TARGET%
echo #endif>>%TARGET%
gen_code fixed >> %TARGET%
cl gen_avx512.cpp %OPT%
echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET%
gen_avx512 | %SORT% >> %TARGET%
echo #ifdef XBYAK64>> %TARGET%
gen_avx512 64 | %SORT% >> %TARGET%
echo #endif>> %TARGET%
echo #endif>> %TARGET%

453
readme.md Normal file
View file

@ -0,0 +1,453 @@
Xbyak 5.52 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
=============
Abstract
-------------
This is a header file which enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic.
Feature
-------------
header file only
you can use Xbyak's functions at once if xbyak.h is included.
### Supported Instructions Sets
MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(*partial*)/AVX/AVX2/FMA/VEX-encoded GPR/AVX-512
### Supported OS
* Windows Xp, Vista, Windows 7(32bit, 64bit)
* Linux(32bit, 64bit)
* Intel Mac OSX
### Supported Compilers
* Visual Studio C++ VC2012 or later
* gcc 4.7 or later
* clang 3.3
* cygwin gcc 4.5.3
* icc 7.2
>Note: Xbyak uses and(), or(), xor(), not() functions, so "-fno-operator-names" option is required on gcc.
Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_() instead of them.
and_(), or_(), xor_(), not_() are always available.
Install
-------------
The following files are necessary. Please add the path to your compile directories.
* xbyak.h
* xbyak_mnemonic.h
Linux:
make install
These files are copied into /usr/local/include/xbyak
New Feature
-------------
Add support for AVX-512 instruction set.
Syntax
-------------
Make Xbyak::CodeGenerator and make the class method and get the function
pointer by calling cgetCode() and casting the return value.
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
### Addressing
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
NASM Xbyak
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
How to use Selector(Segment Register)
>Note: Segment class is not derived from Operand.
```
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
```
>you can use ptr for almost memory access unless you specify the size of memory.
>dword, word and byte are member variables, then don't use dword as unsigned int, for example.
### AVX
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp+123+xmm2*4], xmm3);
*Remark*
The omitted destination syntax as the following ss disabled.
```
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
```
define `XBYAK_ENABLE_OMITTED_OPERAND` if you use it for backward compatibility.
But the newer version will not support it.
### AVX-512
```
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 32-bit to 128-bit
```
Remark
* k1, ..., k7 are new opmask registers.
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
* `k4 | k3` is different from `k3 | k4`.
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
* specify xword/yword/zword(_b) for m128/m256/m512 if necessary.
### Label
L("L1");
jmp ("L1");
jmp ("L2");
...
a few mnemonics(8-bit displacement jmp)
...
L("L2");
jmp ("L3", T_NEAR);
...
a lot of mnemonics(32-bit displacement jmp)
...
L("L3");
>Call hasUndefinedLabel() to verify your code has no undefined label.
> you can use a label for immediate value of mov like as mov (eax, "L2");
#### 1. support @@, @f, @b like MASM
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
#### 2. localization of label by calling inLocalLabel(), outLocallabel().
labels begining of period between inLocalLabel() and outLocalLabel()
are dealed with local label.
inLocalLabel() and outLocalLabel() can be nested.
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; local label
...
jmp(".lp"); // jmpt to <A>
L("aaa"); // global label
outLocalLabel();
}
void func2()
{
inLocalLabel();
L(".lp"); // <B> ; local label
func1();
jmp(".lp"); // jmp to <B>
inLocalLabel();
}
### Label class
L() and jxx() functions support a new Label class.
Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
Moreover, assignL(dstLabel, srcLabel) method binds dstLabel with srcLabel.
Label label1, label2;
L(label1);
...
jmp(label2);
...
assignL(label2, label1); // label2 <= label1
The above jmp opecode jumps label1.
* Restriction:
* srcLabel must be used in L().
* dstLabel must not be used in L().
Label::getAddress() returns the address specified by the label instance and 0 if not specified.
```
// not AutoGrow mode
Label label;
assert(label.getAddress() == 0);
L(label);
assert(label.getAddress() == getCurr());
```
### Rip
```
Label label;
mov(eax, ptr [rip + label]); // eax = 4
...
L(label);
dd(4);
```
```
int x;
...
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
```
### Code size
The default max code size is 4096 bytes. Please set it in constructor of CodeGenerator() if you want to use large size.
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
### use user allocated memory
You can make jit code on prepaired memory.
class Sample : public Xbyak::CodeGenerator {
public:
Sample(void *userPtr, size_t size)
: Xbyak::CodeGenerator(size, userPtr)
{
...
}
};
const size_t codeSize = 1024;
uint8 buf[codeSize + 16];
// get 16-byte aligned address
uint8 *p = Xbyak::CodeArray::getAlignedAddress(buf);
// append executable attribute to the memory
Xbyak::CodeArray::protect(p, codeSize, true);
// construct your jit code on the memory
Sample s(p, codeSize);
>See *sample/test0.cpp*
AutoGrow
-------------
Under `AutoGrow` mode, Xbyak extends memory automatically if necessary.
Call ready() before calling getCode() to calc address of jmp.
```
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
Code c;
c.ready(); // Don't forget to call this function
```
>Don't use the address returned by getCurr() before calling ready().
>It may be invalid address.
>RESTRICTION : rip addressing is not supported in AutoGrow
Macro
-------------
* **XBYAK32** is defined on 32bit.
* **XBYAK64** is defined on 64bit.
* **XBYAK64_WIN** is defined on 64bit Windows(VC)
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin
* define **XBYAK_NO_OP_NAMES** on gcc without `-fno-operator-names`
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(duplicated in the future)
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro
Sample
-------------
* test0.cpp ; tiny sample of Xbyak(x86, x64)
* quantize.cpp ; JIT optimized quantization by fast division(x86 only)
* calc.cpp ; assemble and estimate a given polynomial(x86, x64)
* bf.cpp ; JIT brainfuck(x86, x64)
License
-------------
modified new BSD License
http://opensource.org/licenses/BSD-3-Clause
The files under test/cybozu/ are copied from cybozulib(https://github.com/herumi/cybozulib/),
which is licensed by BSD-3-Clause and are used for only tests.
The header files under xbyak/ are independent of cybozulib.
History
-------------
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
* 2016/Aug/03 ver 5.01 disable omitted operand
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
* 2016/Jun/13 avx-512 add mask instructions
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
* 2015/Oct/05 ver 4.87 support segment selectors
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
* 2015/May/24 ver 4.82 support detection of F16C
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
* 2014/Oct/14 ver 4.70 support MmapAllocator
* 2014/Jun/13 ver 4.62 disable warning of VC2014
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
* 2014/Apr/11 ver 4.52 add detection of rdrand
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
* 2014/Mar/16 ver 4.50 support new Label
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
* 2013/Oct/16 ver 4.21 label support std::string
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm).
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
* 2013/Jan/06 ver 3.73 use unordered_map if possible
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
* 2012/May/03 ver 3.60 change interface of Allocator
* 2012/Mar/23 ver 3.51 fix userPtr mode
* 2012/Mar/19 ver 3.50 support AutoGrow mode
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
* 2011/May/23 ver 3.00 add vcmpeqps and so on
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
* 2011/Feb/04 ver 2.99 beta support AVX
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
* 2010/Jun/07 ver 2.29 fix call(<label>)
* 2010/Jun/17 ver 2.28 move some member functions to public
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
* 2009/Nov/28 support a part of FPU
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
* 2008/Jun/02 support memory interface allocated by user
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
* 2008/Apr/30 add cmpxchg16b, cdqe
* 2008/Apr/29 support x64
* 2008/Apr/14 code refactoring
* 2008/Mar/12 add bsr/bsf
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
* 2007/Nov/5 support lock, xadd, xchg
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
* 2007/Jan/4 first version
Author
-------------
MITSUNARI Shigeo(herumi@nifty.com)

473
readme.txt Normal file
View file

@ -0,0 +1,473 @@
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.52
-----------------------------------------------------------------------------
◎概要
これはx86, x64(AMD64, x86-64)のマシン語命令を生成するC++のクラスライブラリです。
プログラム実行時に動的にアセンブルすることが可能です。
-----------------------------------------------------------------------------
◎特徴
・ヘッダファイルオンリー
xbyak.hをインクルードするだけですぐ利用することができます。
C++の枠組み内で閉じているため、外部アセンブラは不要です。
32bit/64bit両対応です。
対応ニーモニック:特権命令除くx86, MMX/MMX2/SSE/SSE2/SSE3/SSSE3/SSE4/FPU(一部)/AVX/AVX2/FMA/VEX-encoded GPR
・Windows Xp(32bit, 64bit), Windows 7/Linux(32bit, 64bit)/Intel Mac対応
Windows Xp, Windows 7上ではVC2008, VC2010, VC2012
Linux (kernel 3.8)上ではgcc 4.7.3, clang 3.3
Intel Mac
などで動作確認をしています。
※ Xbyakはデフォルトでand(), or(), xor(), not()関数を使います。
gccではそれらを演算子として解釈してしまうため、-fno-operator-namesオプションを追加してコンパイルしてください。
あるいはXBYAK_NO_OP_NAMESを定義してand_(), or_(), xor_(), not_()を使ってください。
and_(), or_(), xor_(), not_()はXBYAK_NO_OP_NAMESされていないときでも使えます。
-----------------------------------------------------------------------------
◎準備
xbyak.h
xbyak_bin2hex.h
xbyak_mnemonic.h
これらを同一のパスに入れてインクルードパスに追加してください。
Linuxではmake installで/usr/local/include/xbyakにコピーされます。
-----------------------------------------------------------------------------
◎下位互換性の破れ
* Xbyak::Errorの型をenumからclassに変更
** 従来のenumの値をとるにはintにキャストしてください。
* (古い)Reg32eクラスを(新しい)Reg32eとRegExpに分ける。
** (新しい)Reg32eはReg32かReg64
** (新しい)RegExpは'Reg32e + (Reg32e|Xmm|Ymm) * scale + disp'の型
-----------------------------------------------------------------------------
◎新機能
MmapAllocator追加
これはUnix系OSでのみの仕様です。XBYAK_USE_MMAP_ALLOCATORを使うと利用できます。
デフォルトのAllocatorはメモリ確保時にposix_memalignを使います。
この領域に対するmprotectはmap countを減らします。
map countの最大値は/proc/sys/vm/max_map_countに書かれています。
デフォルトでは3万個ほどのXbyak::CodeGeneratorインスタンスを生成するとエラーになります。
test/mprotect_test.cppで確認できます。
これを避けるためにはmmapを使うMmapAllocatorを使ってください。
将来この挙動がデフォルトになるかもしれません。
AutoGrowモード追加
これはメモリ伸長を動的に行うモードです。
今まではXbyak::CodeGenerator()に渡したメモリサイズを超えると例外が発生して
いましたが、このモードでは内部でメモリを再確保して伸長します。
ただし、getCode()を呼び出す前にジャンプ命令のアドレス解決をするためにready()
関数を呼ぶ必要があります。
次のように使います。
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
{
...
}
};
Code c;
c.ready(); // この呼び出しを忘れてはいけない
注意1. ready()を呼んで確定するまではgetCurr()で得たポインタは無効化されている
可能性があります。getSize()でoffsetを保持しておきready()のあとにgetCode()を
呼び出してからgetCode() + offsetで新しいポインタを取得してください。
注意2. AutoGrowモードでは64bitモードの相対アドレッシング[rip]は非サポートです。
-----------------------------------------------------------------------------
◎文法
Xbyak::CodeGeneratorクラスを継承し、そのクラスメソッド内でx86, x64アセンブラを
記述します。そのメソッドを呼び出した後、getCode()メソッドを呼び出し、その戻
り値を自分が使いたい関数ポインタに変換して利用します。アセンブルエラーは例外
により通知されます(cf. main.cpp)。
・基本的にnasmの命令で括弧をつければよいです。
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
ret --> ret();
・アドレッシング
(ptr|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only
という形で指定します。サイズを指定する必要がない限りptrを使えばよいです。
セレクター(セグメントレジスタ)をサポートしました。
(注意)セグメントレジスタはOperandを継承していません。
mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
mov eax, [ebx+ecx] --> mov (eax, ptr[ebx+ecx]);
test byte [esp], 4 --> test (byte [esp], 4);
(注意) dword, word, byteはメンバ変数です。従ってたとえばunsigned intの
つもりでdwordをtypedefしないでください。
・AVX
FMAについては簡略表記を導入するか検討中です(アイデア募集中)。
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // メモリアクセスはptrで
vfmadd231pd(xmm1, xmm2, xmm3); // xmm1 <- (xmm2 * xmm3) + xmm1
*注意*
デスティネーションの省略形はサポートされなくなりました。
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
XBYAK_ENABLE_OMITTED_OPERANDを定義すると使えますが、将来はそれも非サポートになるでしょう。
・AVX-512
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 32-bit to 128-bit
注意
* k1, ..., k7 は新しいopmaskレジスタです。
* z, sae, rn-sae, rd-sae, ru-sae, rz-saeの代わりにT_z, T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_saeを使ってください。
* `k4 | k3`と`k3 | k4`は意味が異なります。
* {1toX}の代わりにptr_bを使ってください。Xは自動的に決まります。
* 一部の命令はメモリサイズを指定するためにxword/yword/zword(_b)を使ってください。
・ラベル
L(文字列);
で定義します。ジャンプするときはその文字列を指定します。後方参照も可能ですが、
相対アドレスが8ビットに収まらない場合はT_NEARをつけないと実行時に例外が発生
します。
mov(eax, "L2");の様にラベルが表すアドレスをmovの即値として使えます。
・hasUndefinedLabel()を呼び出して真ならジャンプ先が存在しないことを示します。
コードを見直してください。
L("L1");
jmp ("L1");
jmp ("L2");
...
少しの命令の場合。
...
L("L2");
jmp ("L3", T_NEAR);
...
沢山の命令がある場合
...
L("L3");
<応用編>
1. MASMライクな@@, @f, @bをサポート
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
L("@@"); // <B>
jmp("@b"); // jmp to <B>
mov(eax, "@b");
jmp(eax); // jmp to <B>
2. ラベルの局所化
ピリオドで始まるラベルをinLocalLabel(), outLocalLabel()で挟むことで局所化できます。
inLocalLabel(), outLocalLabel()は入れ子にすることができます。
void func1()
{
inLocalLabel();
L(".lp"); // <A> ; ローカルラベル
...
jmp(".lp"); // jmpt to <A>
L("aaa"); // グローバルラベル
outLocalLabel();
}
void func2()
{
inLocalLabel();
L(".lp"); // <B> ; ローカルラベル
func1();
jmp(".lp"); // jmp to <B>
outLocalLabel();
}
上記サンプルではinLocalLabel(), outLocalLabel()が無いと、
".lp"ラベルの二重定義エラーになります。
3. 新しいLabelクラスによるジャンプ命令
ジャンプ先を文字列による指定だけでなくラベルクラスを使えるようになりました。
Label label1, label2;
L(label1);
...
jmp(label1);
...
jmp(label2);
...
L(label2);
更にラベルの割り当てを行うassignL(dstLabel, srcLabel)という命令も追加されました。
Label label1, label2;
L(label1);
...
jmp(label2);
...
assignL(label2, label1);
上記jmp命令はlabel1にジャンプします。
制限
* srcLabelはL()により飛び先が確定していないといけません。
* dstLabelはL()により飛び先が確定していてはいけません。
ラベルは`getAddress()`によりそのアドレスを取得できます。
未定義のときは0が返ります。
```
// not AutoGrow mode
Label label;
assert(label.getAddress(), 0);
L(label);
assert(label.getAddress(), getCurr());
```
・Xbyak::CodeGenerator()コンストラクタインタフェース
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
@param userPtr [in] ユーザ指定メモリ
CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0);
デフォルトコードサイズは4096(=DEFAULT_MAX_CODE_SIZE)バイトです。
それより大きなコードを生成する場合はCodeGenerator()のコンストラクタに指定してください。
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
: CodeGenerator(8192)
{
}
...
};
またユーザ指定メモリをコード生成最大サイズと共に指定すると、CodeGeneratorは
指定されたメモリ上にバイト列を生成します。
補助関数として指定されたアドレスの実行属性を変更するCodeArray::protect()と
与えられたポインタからアライメントされたポインタを取得するCodeArray::getAlignedAddress()
も用意しました。詳細はsample/test0.cppのuse memory allocated by userを参考に
してください。
/**
change exec permission of memory
@param addr [in] buffer address
@param size [in] buffer size
@param canExec [in] true(enable to exec), false(disable to exec)
@return true(success), false(failure)
*/
bool CodeArray::protect(const void *addr, size_t size, bool canExec);
/**
get aligned memory pointer
*/
uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE);
その他詳細は各種サンプルを参照してください。
-----------------------------------------------------------------------------
◎マクロ
32bit環境上でコンパイルするとXBYAK32が、64bit環境上でコンパイルするとXBYAK64が
定義されます。さらに64bit環境上ではWindows(VC)ならXBYAK64_WIN、cygwin, gcc上では
XBYAK64_GCCが定義されます。
-----------------------------------------------------------------------------
◎使用例
test0.cpp ; 簡単な例(x86, x64)
quantize.cpp ; 割り算のJITアセンブルによる量子化の高速化(x86)
calc.cpp ; 与えられた多項式をアセンブルして実行(x86, x64)
boost(http://www.boost.org/)が必要
bf.cpp ; JIT Brainfuck(x86, x64)
-----------------------------------------------------------------------------
◎ライセンス
修正された新しいBSDライセンスに従います。
http://opensource.org/licenses/BSD-3-Clause
sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
いただきました。
test/cybozu/以下のファイルはcybozulib(https://github.com/herumi/cybozulib/)
の一部を使っています。cybozulibはBSD-3-Clauseライセンスです。
cybozulibは単体テストでのみ利用されていて、xbyak/ディレクトリ以下のヘッダ
ファイルはcybozulibとは独立に利用できます。
-----------------------------------------------------------------------------
◎履歴
2017/08/18 ver 5.52 align修正(thanks to MerryMage)
2017/08/17 ver 5.51 multi-byte nop追加 align()はそれを使用する(thanks to inolen)
2017/08/08 ver 5.50 mpx追加(thanks to magurosan)
2017/08/08 ver 5.45 sha追加(thanks to magurosan)
2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso)
2017/07/12 ver 5.432 PVS-studioの警告を減らす
2017/07/09 ver 5.431 hasRex()修正 (影響なし) (thanks to drillsar)
2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed)
2017/05/13 ver 5.42 movs{b,w,d,q}追加
2017/01/26 ver 5.41 prefetcwt1追加とscale == 0対応(thanks to rsdubtso)
2016/12/14 ver 5.40 Labelが示すアドレスを取得するLabel::getAddress()追加
2016/12/07 ver 5.34 disp8N時の負のオフセット処理の修正(thanks to rsdubtso)
2016/12/06 ver 5.33 disp8N時のvpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}のバグ修正
2016/12/01 ver 5.32 clang for Visual Studioサポートのために__xgetbv()を_xgetbv()に変更(thanks to freiro)
2016/11/27 ver 5.31 AVX512_4VNNIをAVX512_4VNNIWに変更
2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令の追加(thanks to rsdubtso)
2016/11/26 ver 5.20 AVX512_4VNNIとAVX512_4FMAPSの判定追加(thanks to rsdubtso)
2016/11/20 ver 5.11 何故か消えていたvptest for ymm追加(thanks to gregory38)
2016/11/20 ver 5.10 [rip+&var]の形のアドレッシング追加
2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYの判定ミス修正(thanks to PVS-Studio)
2016/08/15 ver 5.02 xbyak_bin2hex.hをincludeしない
2016/08/15 ver 5.011 gcc 5.4のバージョン取得ミスの修正
2016/08/03 ver 5.01 AVXの省略表記非サポート
2016/07/24 ver 5.00 avx-512フルサポート
2016/06/13 avx-512 opmask命令サポート
2016/05/05 ver 4.91 AVX-512命令の検出サポート
2016/03/14 ver 4.901 ready()関数にコメント加筆(thanks to skmp)
2016/02/04 ver 4.90 条件分岐命令にjcc(const void *addr);のタイプを追加
2016/01/30 ver 4.89 vpblendvbがymmレジスタをサポートしていなかった(thanks to John Funnell)
2016/01/24 ver 4.88 lea, cmovの16bitレジスタ対応(thanks to whyisthisfieldhere)
2015/08/16 ver 4.87 セグメントセレクタに対応
2015/08/16 ver 4.86 [rip + label]アドレッシングで即値を使うと壊れる(thanks to whyisthisfieldhere)
2015/08/10 ver 4.85 Address::operator==()が間違っている(thanks to inolen)
2015/07/22 ver 4.84 call()がvariadic template対応
2015/05/24 ver 4.83 mobveサポート(thanks to benvanik)
2015/05/24 ver 4.82 F16Cが使えるかどうかの判定追加
2015/04/25 ver 4.81 setSizeが例外を投げる条件を修正(thanks to whyisthisfieldhere)
2015/04/22 ver 4.80 rip相対でLabelのサポート(thanks to whyisthisfieldhere)
2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacのサポート
2014/10/14 ver 4.70 MmapAllocatorのサポート
2014/06/13 ver 4.62 VC2014で警告抑制
2014/05/30 ver 4.61 bt, bts, btr, btcのサポート
2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phのサポート
2014/04/11 ver 4.52 rdrandの判定追加
2014/03/25 ver 4.51 参照されなくなったラベルの状態を削除する
2014/03/16 ver 4.50 新しいラベルクラスのサポート
2014/03/05 ver 4.40 VirtualBox上でBMI/enhanced repのサポート判定を間違うことがあるのを修正
2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()のサポート
2013/10/16 ver 4.21 ラベルでstd::stringを受け付ける。
2013/07/30 ver 4.20 [break backward compatibility] 従来のReg32eクラスをアドレッシング用のRegExpとReg32, Reg64を表すReg32eに分離
2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorの型をenumからclassに変更
2013/06/21 ver 4.02 LABELの指すアドレスを書き込むputL(LABEL)関数の追加。
2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm)
support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest)
2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサポート
2013/03/27 ver 3.80 mov(reg, "label");をサポート
2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加
2013/01/15 ver 3.75 生成されたコードを修正するためにsetSize()を追加
2013/01/12 ver 3.74 CodeGenerator::reset()とAllocator::useProtect()を追加
2013/01/06 ver 3.73 可能ならunordered_mapを使う
2012/12/04 ver 3.72 eaxなどをCodeGeneratorのメンバ変数に戻す. Xbyak::util::eaxはstatic const変数
2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()をXBYAK_NO_OP_NAMESが定義されていないときでも使えるようにした
2012/11/17 CodeGeneratorのeax, ecx, ptrなどのメンバ変数をstaticにし、const参照をXbyak::utilにも定義
2012/11/09 ver 3.70 and()をand_()にするためのマクロXBYAK_NO_OP_NAMESを追加(thanks to Mattias)
2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit
2012/11/01 ver 3.61 add fldcw/fstcw
2012/05/03 ver 3.60 Allocatorクラスのインタフェースを変更
2012/03/23 ver 3.51 userPtrモードがバグったのを修正
2012/03/19 ver 3.50 AutoGrowモードサポート
2011/11/09 ver 3.05 rip相対の64bitサイズ以外の扱いのバグ修正 / movsxdサポート
2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);などにおけるimm8の扱いのバグ修正(thanks to lolcat)
2011/06/16 ver 3.03 Macのgcc上での__GNUC_PREREQがミスってたのを修正(thanks to t_teruya)
2011/04/28 ver 3.02 Macのgcc上ではxgetbvをdisable
2011/03/24 ver 3.01 fix typo of OSXSAVE
2011/03/23 ver 3.00 vcmpeqpsなどを追加
2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm
2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
2011/02/04 ver 2.99 beta support AVX
2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp
2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
2010/07/07 ver 2.29 fix call(<label>)
2010/06/17 ver 2.28 move some member functions to public
2010/06/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
2010/05/24 ver 2.26 fix sub(rsp, 1000)
2010/04/26 ver 2.25 add jc/jnc(I forgot to implement them...)
2010/04/16 ver 2.24 change the prototype of rewrite() method
2010/04/15 ver 2.23 fix align() and xbyak_util.h for Mac
2010/02/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
2009/12/09 ver 2.21 support cygwin(gcc 4.3.2)
2009/11/28 ver 2.20 FPUの一部命令サポート
2009/06/25 ver 2.11 64bitモードでの mov(qword[rax], imm); 修正(thanks to Martinさん)
2009/03/10 ver 2.10 jmp/call reg64の冗長なREG.W削除
2009/02/24 ver 2.09 movq reg64, mmx/xmm; movq mmx/xmm, reg64追加
2009/02/13 ver 2.08 movd(xmm7, dword[eax])が0x66を落とすバグ修正(thanks to Gabestさん)
2008/12/30 ver 2.07 call()の相対アドレスが8bit以下のときのバグ修正(thanks to katoさん)
2008/09/18 ver 2.06 @@, @f, @bとラベルの局所化機能追加(thanks to nobu-qさん)
2008/09/18 ver 2.05 ptr [rip + 32bit offset]サポート(thanks to 団子厨(Dango-Chu)さん)
2008/06/03 ver 2.04 align()のポカミス修正。mov(ptr[eax],1);などをエラーに
2008/06/02 ver 2.03 ユーザ定義メモリインタフェースサポート
2008/05/26 ver 2.02 protect()(on Linux)で不正な設定になることがあるのを修正(thanks to sinichiro_hさん)
2008/04/30 ver 2.01 cmpxchg16b, cdqe追加
2008/04/29 ver 2.00 x86/x64-64版公開
2008/04/25 ver 1.90 x64版β公開
2008/04/18 ver 1.12 コード整理
2008/04/14 ver 1.11 コード整理
2008/03/12 ver 1.10 bsf/bsr追加(忘れていた)
2008/02/14 ver 1.09 sub eax, 1234が16bitモードで出力されていたのを修正(thanks to Robertさん)
2007/11/05 ver 1.08 lock, xadd, xchg追加
2007/11/02 ver 1.07 SSSE3/SSE4対応(thanks to 団子厨(Dango-Chu)さん)
2007/09/25 ver 1.06 call((int)関数ポインタ); jmp((int)関数ポインタ);のサポート
2007/08/04 ver 1.05 細かい修正
2007/02/04 後方へのジャンプでT_NEARをつけないときに8bit相対アドレスに入らない
場合に例外が発生しないバグの修正
2007/01/21 [disp]の形のアドレス生成のバグ修正
mov (eax|ax|al, [disp]); mov([disp], eax|ax|al);の短い表現選択
2007/01/17 webページ作成
2007/01/04 公開開始
-----------------------------------------------------------------------------
◎著作権者
光成滋生(MITSUNARI Shigeo, herumi@nifty.com)
---
$Revision: 1.56 $
$Date: 2010/04/16 11:58:22 $

109
sample/Makefile Normal file
View file

@ -0,0 +1,109 @@
TARGET = test quantize bf toyvm test_util memfunc static_buf jmp_table
XBYAK_INC=../xbyak/xbyak.h
BOOST_EXIST=$(shell echo "\#include <boost/spirit/core.hpp>" | (gcc -E - 2>/dev/null) | grep "boost/spirit/core.hpp" >/dev/null && echo "1")
UNAME_M=$(shell uname -m)
ifeq ($(shell uname -s),Darwin)
ifeq ($(UNAME_M),x86_64)
BIT=64
endif
ifeq ($(UNAME_M),i386)
BIT=32
endif
ifeq ($(shell sw_vers -productVersion | cut -c1-4 | sed 's/\.//'),105)
ifeq ($(shell sysctl -n hw.cpu64bit_capable),1)
BIT=64
endif
endif
else
BIT=32
ifeq ($(UNAME_M),x86_64)
BIT=64
endif
ifeq ($(UNAME_M),amd64)
BIT=64
endif
endif
ifeq ($(BIT),64)
TARGET += test64 bf64 memfunc64 test_util64 static_buf64 jmp_table64
ifeq ($(BOOST_EXIST),1)
TARGET += calc64 #calc2_64
endif
endif
ifeq ($(BOOST_EXIST),1)
TARGET += calc #calc2
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith -pedantic
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
test:
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
quantize:
$(CXX) $(CFLAGS) quantize.cpp -o $@ -m32
calc:
$(CXX) $(CFLAGS) calc.cpp -o $@ -m32
calc64:
$(CXX) $(CFLAGS) calc.cpp -o $@ -m64
calc2:
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m32
calc2_64:
$(CXX) $(CFLAGS) calc2.cpp -o $@ -m64
bf:
$(CXX) $(CFLAGS) bf.cpp -o $@ -m32
bf64:
$(CXX) $(CFLAGS) bf.cpp -o $@ -m64
memfunc:
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m32
memfunc64:
$(CXX) $(CFLAGS) memfunc.cpp -o $@ -m64
toyvm:
$(CXX) $(CFLAGS) toyvm.cpp -o $@ -m32
test64:
$(CXX) $(CFLAGS) test0.cpp -o $@ -m64
test_util:
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m32
test_util64:
$(CXX) $(CFLAGS) test_util.cpp -o $@ -m64
static_buf:
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m32
static_buf64:
$(CXX) $(CFLAGS) static_buf.cpp -o $@ -m64
jmp_table:
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
jmp_table64:
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
clean:
rm -rf *.o $(TARGET) *.exe
test : test0.cpp $(XBYAK_INC)
test64: test0.cpp $(XBYAK_INC)
quantize : quantize.cpp $(XBYAK_INC)
calc : calc.cpp $(XBYAK_INC)
calc64 : calc.cpp $(XBYAK_INC)
calc2 : calc2.cpp $(XBYAK_INC)
calc2_64 : calc2.cpp $(XBYAK_INC)
bf : bf.cpp $(XBYAK_INC)
bf64 : bf.cpp $(XBYAK_INC)
memfunc : memfunc.cpp $(XBYAK_INC)
memfunc64 : memfunc.cpp $(XBYAK_INC)
toyvm : toyvm.cpp $(XBYAK_INC)
static_buf: static_buf.cpp $(XBYAK_INC)
static_buf64: static_buf.cpp $(XBYAK_INC)
test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
jmp_table: jmp_table.cpp $(XBYAK_INC)
jmp_table64: jmp_table.cpp $(XBYAK_INC)

213
sample/bf.cpp Normal file
View file

@ -0,0 +1,213 @@
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include <stdio.h>
#include <stdlib.h>
#include <stack>
#include <fstream>
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#define snprintf _snprintf_s
#endif
class Brainfuck : public Xbyak::CodeGenerator {
private:
enum Direction { B, F };
std::string toStr(int labelNo, Direction dir)
{
return Xbyak::Label::toStr(labelNo) + (dir == B ? 'B' : 'F');
}
public:
int getContinuousChar(std::istream& is, char c)
{
int count = 1;
char p;
while (is >> p) {
if (p != c) break;
count++;
}
is.unget();
return count;
}
Brainfuck(std::istream& is) : CodeGenerator(100000)
{
// void (*)(void* putchar, void* getchar, int *stack)
using namespace Xbyak;
#ifdef XBYAK32
const Reg32& pPutchar(esi);
const Reg32& pGetchar(edi);
const Reg32& stack(ebp);
const Address cur = dword [stack];
push(ebp); // stack
push(esi);
push(edi);
const int P_ = 4 * 3;
mov(pPutchar, ptr[esp + P_ + 4]); // putchar
mov(pGetchar, ptr[esp + P_ + 8]); // getchar
mov(stack, ptr[esp + P_ + 12]); // stack
#elif defined(XBYAK64_WIN)
const Reg64& pPutchar(rsi);
const Reg64& pGetchar(rdi);
const Reg64& stack(rbp); // stack
const Address cur = dword [stack];
push(rsi);
push(rdi);
push(rbp);
mov(pPutchar, rcx); // putchar
mov(pGetchar, rdx); // getchar
mov(stack, r8); // stack
#else
const Reg64& pPutchar(rbx);
const Reg64& pGetchar(rbp);
const Reg64& stack(r12); // stack
const Address cur = dword [stack];
push(rbx);
push(rbp);
push(r12);
mov(pPutchar, rdi); // putchar
mov(pGetchar, rsi); // getchar
mov(stack, rdx); // stack
#endif
int labelNo = 0;
std::stack<int> keepLabelNo;
char c;
while (is >> c) {
switch (c) {
case '+':
case '-':
{
int count = getContinuousChar(is, c);
if (count == 1) {
c == '+' ? inc(cur) : dec(cur);
} else {
add(cur, (c == '+' ? count : -count));
}
}
break;
case '>':
case '<':
{
int count = getContinuousChar(is, c);
add(stack, 4 * (c == '>' ? count : -count));
}
break;
case '.':
#ifdef XBYAK32
push(cur);
call(pPutchar);
pop(eax);
#elif defined(XBYAK64_WIN)
mov(ecx, cur);
sub(rsp, 32);
call(pPutchar);
add(rsp, 32);
#else
mov(edi, cur);
call(pPutchar);
#endif
break;
case ',':
#if defined(XBYAK32) || defined(XBYAK64_GCC)
call(pGetchar);
#elif defined(XBYAK64_WIN)
sub(rsp, 32);
call(pGetchar);
add(rsp, 32);
#endif
mov(cur, eax);
break;
case '[':
L(toStr(labelNo, B));
mov(eax, cur);
test(eax, eax);
jz(toStr(labelNo, F), T_NEAR);
keepLabelNo.push(labelNo++);
break;
case ']':
{
int no = keepLabelNo.top(); keepLabelNo.pop();
jmp(toStr(no, B));
L(toStr(no, F));
}
break;
default:
break;
}
}
#ifdef XBYAK32
pop(edi);
pop(esi);
pop(ebp);
#elif defined(XBYAK64_WIN)
pop(rbp);
pop(rdi);
pop(rsi);
#else
pop(r12);
pop(rbp);
pop(rbx);
#endif
ret();
}
};
void dump(const Xbyak::uint8 *code, size_t size)
{
puts("#include <stdio.h>\nstatic int stack[128 * 1024];");
#ifdef _MSC_VER
printf("static __declspec(align(4096)) ");
#else
printf("static __attribute__((aligned(4096)))");
#endif
puts("const unsigned char code[] = {");
for (size_t i = 0; i < size; i++) {
printf("0x%02x,", code[i]); if ((i % 16) == 15) putchar('\n');
}
puts("\n};");
#ifdef _MSC_VER
puts("#include <windows.h>");
#else
puts("#include <unistd.h>");
puts("#include <sys/mman.h>");
#endif
puts("int main()\n{");
#ifdef _MSC_VER
puts("\tDWORD oldProtect;");
puts("\tVirtualProtect((void*)code, sizeof(code), PAGE_EXECUTE_READWRITE, &oldProtect);");
#else
puts("\tlong pageSize = sysconf(_SC_PAGESIZE) - 1;");
puts("\tmprotect((void*)code, (sizeof(code) + pageSize) & ~pageSize, PROT_READ | PROT_EXEC);");
#endif
puts(
"\t((void (*)(void*, void*, int *))code)((void*)putchar, (void*)getchar, stack);\n"
"}"
);
}
int main(int argc, char *argv[])
{
#ifdef XBYAK32
fprintf(stderr, "32bit mode\n");
#else
fprintf(stderr, "64bit mode\n");
#endif
if (argc == 1) {
fprintf(stderr, "bf filename.bf [0|1]\n");
return 1;
}
std::ifstream ifs(argv[1]);
int mode = argc == 3 ? atoi(argv[2]) : 0;
try {
Brainfuck bf(ifs);
if (mode == 0) {
static int stack[128 * 1024];
bf.getCode<void (*)(void*, void*, int *)>()(Xbyak::CastTo<void*>(putchar), Xbyak::CastTo<void*>(getchar), stack);
} else {
dump(bf.getCode(), bf.getSize());
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

427
sample/bf.vcproj Normal file
View file

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="bf"
ProjectGUID="{654BD79B-59D3-4B10-BBAA-158BAB272828}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/bf.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/bf.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/bf.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/bf.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/bf.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/bf.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/bf.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/bf.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/bf.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/bf.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/bf.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="bf.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

229
sample/calc.cpp Normal file
View file

@ -0,0 +1,229 @@
/*
@author herumi
tiny calculator
This program generates a function to calc the value of
polynomial given by user in run-time.
use boost::spirit::classic
see calc2.cpp for new version of boost::spirit
*/
#include <stdio.h>
#include <sstream>
#include <map>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4127) // for boost(constant condition)
#pragma warning(disable : 4512) // for boost
#endif
#include <boost/spirit/include/classic_file_iterator.hpp>
#include <boost/spirit/include/classic_core.hpp>
#include <boost/bind.hpp>
enum Error {
UNDEFINED_VARIABLE = 1
};
/*
JIT assemble of given polynomial for VC or gcc
*/
class FuncGen : public Xbyak::CodeGenerator {
public:
typedef std::map<std::string, int> Map;
private:
enum {
MAX_CONST_NUM = 32
};
double constTbl_[MAX_CONST_NUM];
size_t constTblPos_;
int regIdx_;
Map varMap_; // map var name to index
#ifdef XBYAK32
const Xbyak::Reg32& valTbl_;
const Xbyak::Reg32& tbl_;
#else
const Xbyak::Reg64& valTbl_;
const Xbyak::Reg64& tbl_;
#endif
public:
/*
@param y [out] the value of f(var)
@param var [in] table of input variables
func(double *y, const double var[]);
@note func does not return double to avoid difference of compiler
*/
FuncGen(const std::vector<std::string>& varTbl)
: constTblPos_(0)
, regIdx_(-1)
#ifdef XBYAK32
, valTbl_(eax)
, tbl_(edx)
#elif defined(XBYAK64_WIN)
, valTbl_(rcx)
, tbl_(rdx)
#else
, valTbl_(rdi)
, tbl_(rsi)
#endif
{
#ifdef XBYAK32
mov(valTbl_, ptr[esp+8]); // eax == varTbl
mov(tbl_, (size_t)constTbl_);
#else
#ifdef XBYAK64_WIN
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
movaps(ptr [rsp + 8 + 16], xm7);
#endif
mov(tbl_, (size_t)constTbl_);
#endif
for (int i = 0, n = static_cast<int>(varTbl.size()); i < n; i++) {
varMap_[varTbl[i]] = i;
}
}
// use edx
void genPush(double n)
{
if (constTblPos_ >= MAX_CONST_NUM) throw;
constTbl_[constTblPos_] = n;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + (int)(constTblPos_ * sizeof(double))]);
constTblPos_++;
}
// use eax
void genVal(const char *begin, const char *end)
{
std::string var(begin, end);
if (varMap_.find(var) == varMap_.end()) throw UNDEFINED_VARIABLE;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[valTbl_ + varMap_[var] * sizeof(double)]);
}
void genAdd(const char*, const char*)
{
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genSub(const char*, const char*)
{
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genMul(const char*, const char*)
{
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genDiv(const char*, const char*)
{
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void complete()
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]); // eax = valTbl
movsd(ptr [eax], xm0);
#else
#ifdef XBYAK64_WIN
movaps(xm6, ptr [rsp + 8]);
movaps(xm7, ptr [rsp + 8 + 16]);
#endif
#endif
ret();
}
};
struct Grammar : public boost::spirit::classic::grammar<Grammar> {
FuncGen& f_;
Grammar(FuncGen& f) : f_(f) { }
template<typename ScannerT>
struct definition {
boost::spirit::classic::rule<ScannerT> poly0, poly1, poly2, var;
definition(const Grammar& self)
{
using namespace boost;
using namespace boost::spirit::classic;
poly0 = poly1 >> *(('+' >> poly1)[bind(&FuncGen::genAdd, ref(self.f_), _1, _2)]
| ('-' >> poly1)[bind(&FuncGen::genSub, ref(self.f_), _1, _2)]);
poly1 = poly2 >> *(('*' >> poly2)[bind(&FuncGen::genMul, ref(self.f_), _1, _2)]
| ('/' >> poly2)[bind(&FuncGen::genDiv, ref(self.f_), _1, _2)]);
var = (+alpha_p)[bind(&FuncGen::genVal, ref(self.f_), _1, _2)];
poly2 = real_p[bind(&FuncGen::genPush, ref(self.f_), _1)]
| var
| '(' >> poly0 >> ')';
}
const boost::spirit::classic::rule<ScannerT>& start() const { return poly0; }
};
};
void put(const std::vector<double>& x)
{
printf("%f", x[0]);
for (size_t i = 1, n = x.size(); i < n; i++) {
printf(", %f", x[i]);
}
}
int main(int argc, char *argv[])
{
if (argc <= 2) {
fprintf(stderr, "calc \"var1 var2 ...\" \"function of var\"\n");
fprintf(stderr, "eg. calc x \"x*x\"\n");
fprintf(stderr, "eg. calc \"x y z\" \"x*x + y - z\"\n");
return 1;
}
const char *poly = argv[2];
try {
std::vector<std::string> varTbl;
// get varTbl from argv[1]
{
std::istringstream is(argv[1]);
int i = 0;
printf("varTbl = { ");
while (is) {
std::string var;
is >> var;
if (var.empty()) break;
printf("%s:%d, ", var.c_str(), i);
varTbl.push_back(var);
i++;
}
printf("}\n");
}
FuncGen funcGen(varTbl);
Grammar calc(funcGen);
boost::spirit::classic::parse_info<> r = parse(poly, calc, boost::spirit::classic::space_p);
if (!r.full) {
printf("err poly=%s\n", poly);
return 1;
}
funcGen.complete();
std::vector<double> valTbl;
valTbl.resize(varTbl.size());
#ifdef XBYAK32
puts("32bit mode");
void (*func)(double *ret, const double *valTbl) = funcGen.getCode<void (*)(double *, const double*)>();
#else
puts("64bit mode");
double (*func)(const double *valTbl) = funcGen.getCode<double (*)(const double*)>();
#endif
for (int i = 0; i < 10; i++) {
for (size_t j = 0, n = valTbl.size(); j < n; j++) {
valTbl[j] = rand() % 7;
}
double y;
#ifdef XBYAK32
func(&y, &valTbl[0]);
#else
y = func(&valTbl[0]);
#endif
printf("f("); put(valTbl); printf(")=%f\n", y);
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (Error err) {
printf("ERR:%d\n", err);
} catch (...) {
printf("unknown error\n");
}
return 0;
}

423
sample/calc.vcproj Normal file
View file

@ -0,0 +1,423 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="calc"
ProjectGUID="{5FDDFAA6-B947-491D-A17E-BBD863846579}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/calc.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/calc.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/calc.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/calc.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/calc.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/calc.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/calc.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/calc.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/calc.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/calc.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/calc.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="calc.cpp"
>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

302
sample/calc2.cpp Normal file
View file

@ -0,0 +1,302 @@
/*
@author herumi
tiny calculator 2
This program generates a function to calc the value of
polynomial given by user in run-time.
use boost::spirit::qi
*/
#ifdef _WIN32
#pragma warning(disable : 4127) // for boost(constant condition)
#pragma warning(disable : 4512) // for boost
#pragma warning(disable : 4819)
#endif
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/timer.hpp>
#include <stdio.h>
#include <assert.h>
#include <string>
#include <vector>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
enum Operand {
OpAdd,
OpSub,
OpMul,
OpDiv,
OpNeg,
OpImm,
OpVarX
};
struct Code {
Operand op_;
double val_;
Code(Operand op)
: op_(op)
, val_(0)
{
}
Code(double val)
: op_(OpImm)
, val_(val)
{
}
};
typedef std::vector<Code> CodeSet;
struct Vm {
CodeSet code_;
double operator()(double x) const
{
const size_t maxStack = 16;
double stack[maxStack];
double *p = stack;
CodeSet::const_iterator pc = code_.begin();
while (pc != code_.end()) {
switch (pc->op_) {
case OpVarX:
*p++ = x;
break;
case OpImm:
*p++ = pc->val_;
break;
case OpNeg:
p[-1] = -p[-1];
break;
case OpAdd:
--p;
p[-1] += p[0];
break;
case OpSub:
--p;
p[-1] -= p[0];
break;
case OpMul:
--p;
p[-1] *= p[0];
break;
case OpDiv:
--p;
p[-1] /= p[0];
break;
}
++pc;
assert(p < stack + maxStack);
}
return p[-1];
}
};
class Jit : public Xbyak::CodeGenerator {
private:
enum {
MAX_CONST_NUM = 32
};
MIE_ALIGN(16) double constTbl_[MAX_CONST_NUM];
Xbyak::uint64 negConst_;
size_t constTblPos_;
#ifdef XBYAK32
const Xbyak::Reg32& varTbl_;
const Xbyak::Reg32& tbl_;
#else
const Xbyak::Reg64& tbl_;
#endif
int regIdx_;
public:
/*
double jit(double x);
@note 32bit: x : [esp+4], return fp0
64bit: x [rcx](win), xmm0(gcc), return xmm0
*/
Jit()
: negConst_(Xbyak::uint64(1) << 63)
, constTblPos_(0)
#ifdef XBYAK32
, varTbl_(eax)
, tbl_(edx)
#elif defined(XBYAK64_WIN)
, tbl_(rcx)
#else
, tbl_(rdi)
#endif
, regIdx_(-1)
{
#ifdef XBYAK32
lea(varTbl_, ptr [esp+4]);
#else
#ifdef XBYAK64_WIN
movaps(ptr [rsp + 8], xm6); // save xm6, xm7
movaps(ptr [rsp + 8 + 16], xm7);
#endif
movaps(xm7, xm0); // save xm0
#endif
mov(tbl_, (size_t)constTbl_);
}
void genPush(double n)
{
if (constTblPos_ >= MAX_CONST_NUM) throw;
constTbl_[constTblPos_] = n;
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[tbl_ + constTblPos_ * sizeof(double)]);
constTblPos_++;
}
void genVarX()
{
#ifdef XBYAK32
if (regIdx_ == 7) throw;
movsd(Xbyak::Xmm(++regIdx_), ptr[varTbl_]);
#else
if (regIdx_ == 6) throw;
movsd(Xbyak::Xmm(++regIdx_), xm7);
#endif
}
void genAdd()
{
addsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genSub()
{
subsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genMul()
{
mulsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genDiv()
{
divsd(Xbyak::Xmm(regIdx_ - 1), Xbyak::Xmm(regIdx_)); regIdx_--;
}
void genNeg()
{
xorpd(Xbyak::Xmm(regIdx_), ptr [tbl_ + MAX_CONST_NUM * sizeof(double)]);
}
void complete()
{
#ifdef XBYAK32
sub(esp, 8);
movsd(ptr [esp], xm0);
fld(qword [esp]);
add(esp, 8);
#else
#ifdef XBYAK64_WIN
movaps(xm6, ptr [rsp + 8]);
movaps(xm7, ptr [rsp + 8 + 16]);
#endif
#endif
ret();
}
};
template<typename Iterator>
struct Parser : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
CodeSet& code_;
Parser(CodeSet& code)
: Parser::base_type(expression)
, code_(code)
{
namespace qi = boost::spirit::qi;
using namespace qi::labels;
using boost::phoenix::ref;
using boost::phoenix::push_back;
expression = term >> *(('+' > term[push_back(ref(code_), OpAdd)])
| ('-' > term[push_back(ref(code_), OpSub)]));
term = factor >> *(('*' > factor[push_back(ref(code_), OpMul)])
| ('/' > factor[push_back(ref(code_), OpDiv)]));
factor = qi::double_[push_back(ref(code_), _1)]
| qi::lit('x')[push_back(ref(code_), OpVarX)]
| ('(' > expression > ')')
| ('-' > factor[push_back(ref(code_), OpNeg)])
| ('+' > factor);
}
};
template<typename Iterator>
struct ParserJit : boost::spirit::qi::grammar<Iterator, boost::spirit::ascii::space_type> {
boost::spirit::qi::rule<Iterator, boost::spirit::ascii::space_type> expression, term, factor;
Jit code_;
ParserJit()
: ParserJit::base_type(expression)
{
namespace qi = boost::spirit::qi;
using namespace qi::labels;
using boost::phoenix::ref;
using boost::phoenix::push_back;
using boost::phoenix::bind;
expression = term >> *(('+' > term[bind(&Jit::genAdd, ref(code_))])
| ('-' > term[bind(&Jit::genSub, ref(code_))]));
term = factor >> *(('*' > factor[bind(&Jit::genMul, ref(code_))])
| ('/' > factor[bind(&Jit::genDiv, ref(code_))]));
factor = qi::double_[bind(&Jit::genPush, ref(code_), _1)]
| qi::lit('x')[bind(&Jit::genVarX, ref(code_))]
| ('(' > expression > ')')
| ('-' > factor[bind(&Jit::genNeg, ref(code_))])
| ('+' > factor);
}
};
template<class Func>
void Test(const char *msg, const Func& f)
{
printf("%s:", msg);
boost::timer t;
double sum = 0;
for (double x = 0; x < 1000; x += 0.0001) {
sum += f(x);
}
printf("sum=%f, %fsec\n", sum, t.elapsed());
}
int main(int argc, char *argv[])
{
if (argc < 2) {
fprintf(stderr, "input formula\n");
return 1;
}
const std::string str(argv[1]);
try {
Vm vm;
Parser<std::string::const_iterator> parser(vm.code_);
ParserJit<std::string::const_iterator> parserJit;
const std::string::const_iterator end = str.end();
std::string::const_iterator i = str.begin();
if (!phrase_parse(i, end, parser, boost::spirit::ascii::space) || i != end) {
puts("err 1");
return 1;
}
printf("ret=%f\n", vm(2.3));
i = str.begin();
if (!phrase_parse(i, end, parserJit, boost::spirit::ascii::space) || i != end) {
puts("err 2");
return 1;
}
parserJit.code_.complete();
double (*jit)(double) = parserJit.code_.getCode<double (*)(double)>();
Test("VM ", vm);
Test("JIT", jit);
} catch (...) {
fprintf(stderr, "err\n");
}
}

5
sample/echo.bf Normal file
View file

@ -0,0 +1,5 @@
>>++++++++[->++++++++<]>>>>+++++++++[->++++++++++<]>[<<,[->+<<+<<+>>>]<<<[
->>>+<<<]>>>>>[->+>>+<<<]>[<<[->+>>+<<<]>>>[-<<<+>>>]<<[[-]<->]>-]>>[-<<<+
>>>]<<<<<<<[-<+<<+>>>]<[>>[-<+<<+>>>]<<<[->>>+<<<]>>[[-]>-<]<-]<<[->>>+<<<
]>>>>><[[-]>++++++++++++++++++++++++++++++++>[[-]<------------------------
-------->]<<]>>[-]<.>>]

19
sample/fizzbuzz.bf Normal file
View file

@ -0,0 +1,19 @@
++++++[->++++>>+>+>-<<<<<]>
[<++++>>+++>++++>>+++>+++++>+++++>>>>>>++>>++<<<<<<<<<<<<<<-]
<++++>+++>-->+++>->>--->++>>>+++++[->++>++<<]<<<<<<<<<<
[->
-[>>>>>>>]>[<+++>.>.>>>>..>>>+<]<<<<<
-[>>>>]>[<+++++>.>.>..>>>+<]>>>>
+<-[<<<]<[
[-<<+>>]>>>+>+<<<<<<[->>+>+>-<<<<]<
]>>
[[-]<]>[
>>>[>.<<.<<<]<[.<<<<]>
]
>.<<<<<<<<<<<
]

3
sample/hello.bf Normal file
View file

@ -0,0 +1,3 @@
>+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.[-]>++++++++[<++
++>-]<.>+++++++++++[<+++++>-]<.>++++++++[<+++>-]<.+++.------.--------.[-]>
++++++++[<++++>-]<+.[-]++++++++++.

128
sample/jmp_table.cpp Normal file
View file

@ -0,0 +1,128 @@
/*
sample of move(reg, LABEL);, L(LABEL), putL(LABEL);
*/
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
const int expectTbl[] = {
5, 9, 12
};
struct Code : Xbyak::CodeGenerator {
explicit Code(int mode, size_t size, void *p)
: Xbyak::CodeGenerator(size, p)
{
inLocalLabel();
#ifdef XBYAK64
const Xbyak::Reg64& a = rax;
const Xbyak::Reg64& c = rcx;
#ifdef XBYAK64_WIN
mov(rax, rcx);
#else
mov(rax, rdi);
#endif
#else
const Xbyak::Reg32& a = eax;
const Xbyak::Reg32& c = ecx;
mov(a, ptr [esp + 4]);
#endif
switch (mode) {
case 0:
mov(c, ".jmp_table");
lea(c, ptr [c + a * 8]);
jmp(c);
align(8);
L(".jmp_table");
mov(a, expectTbl[0]);
ret();
align(8);
mov(a, expectTbl[1]);
ret();
align(8);
mov(a, expectTbl[2]);
ret();
break;
case 1:
/*
the label for putL is defined when called
*/
mov(c, ".jmp_table");
jmp(ptr [c + a * (int)sizeof(size_t)]);
L(".label1");
mov(a, expectTbl[0]);
jmp(".end");
L(".label2");
mov(a, expectTbl[1]);
jmp(".end");
L(".label3");
mov(a, expectTbl[2]);
jmp(".end");
L(".end");
ret();
ud2();
align(8);
L(".jmp_table");
putL(".label1");
putL(".label2");
putL(".label3");
break;
case 2:
/*
the label for putL is not defined when called
*/
jmp(".in");
ud2();
align(8);
L(".jmp_table");
putL(".label1");
putL(".label2");
putL(".label3");
L(".in");
mov(c, ".jmp_table");
jmp(ptr [c + a * (int)sizeof(size_t)]);
L(".label1");
mov(a, expectTbl[0]);
jmp(".end");
L(".label2");
mov(a, expectTbl[1]);
jmp(".end");
L(".label3");
mov(a, expectTbl[2]);
jmp(".end");
L(".end");
ret();
break;
}
outLocalLabel();
}
};
int main()
try
{
for (int mode = 0; mode < 3; mode++) {
printf("mode=%d\n", mode);
for (int grow = 0; grow < 2; grow++) {
printf("auto grow=%s\n", grow ? "on" : "off");
Code c(mode, grow ? 30 : 4096, grow ? Xbyak::AutoGrow : 0);
int (*f)(int) = c.getCode<int (*)(int)>();
c.ready();
for (int i = 0; i < 3; i++) {
const int a = expectTbl[i];
const int b = f(i);
if (a != b) {
printf("ERR i=%d, a=%d, b=%d\n", i, a, b);
exit(1);
}
}
}
}
puts("ok");
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}

111
sample/memfunc.cpp Normal file
View file

@ -0,0 +1,111 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
struct A {
int x_;
int y_;
A() : x_(3), y_(5) {}
int func(int a, int b, int c, int d, int e) const { return x_ + y_ + a + b + c + d + e; }
};
#ifdef _MSC_VER
#pragma warning(disable : 4510 4512 4610)
#endif
struct Code : public Xbyak::CodeGenerator {
Code()
{
using namespace Xbyak;
int RET_ADJ = 0;
#ifdef XBYAK32
#ifdef _WIN32
const int PARA_ADJ = 0;
RET_ADJ = 5 * 4;
#else
const int PARA_ADJ = 4;
mov(ecx, ptr [esp + 4]);
#endif
#endif
const struct {
#ifdef XBYAK32
const Reg32& self;
#else
const Reg64& self;
#endif
const Operand& a;
const Operand& b;
const Operand& c;
const Operand& d;
const Operand& e;
} para = {
#if defined(XBYAK64_WIN)
rcx,
edx,
r8d,
r9d,
ptr [rsp + 8 * 5],
ptr [rsp + 8 * 6],
#elif defined(XBYAK64_GCC)
rdi,
esi,
edx,
ecx,
r8d,
r9d,
#else
ecx,
ptr [esp + 4 + PARA_ADJ],
ptr [esp + 8 + PARA_ADJ],
ptr [esp + 12 + PARA_ADJ],
ptr [esp + 16 + PARA_ADJ],
ptr [esp + 20 + PARA_ADJ],
#endif
};
mov(eax, ptr [para.self]);
add(eax, ptr [para.self + 4]);
add(eax, para.a);
add(eax, para.b);
add(eax, para.c);
add(eax, para.d);
add(eax, para.e);
ret(RET_ADJ);
}
};
int main()
{
#ifdef XBYAK64
printf("64bit");
#else
printf("32bit");
#endif
#ifdef _WIN32
puts(" win");
#else
puts(" linux");
#endif
try {
Code code;
int (A::*p)(int, int, int, int, int) const = 0;
const void *addr = code.getCode<void*>();
memcpy(&p, &addr, sizeof(void*));
for (int i = 0; i < 10; i++) {
A a;
int t1, t2, t3, t4, t5, x, y;
a.x_ = rand(); a.y_ = rand();
t1 = rand(); t2 = rand(); t3 = rand();
t4 = rand(); t5 = rand();
x = a.func(t1, t2, t3, t4, t5);
y = (a.*p)(t1, t2, t3, t4, t5);
printf("%c %d, %d\n", x == y ? 'o' : 'x', x, y);
}
} catch (std::exception& e) {
printf("err=%s\n", e.what());
return 1;
}
}

229
sample/quantize.cpp Normal file
View file

@ -0,0 +1,229 @@
/*
@author herumi
JPEG quantize sample
This program generates a quantization routine by using fast division algorithm in run-time.
time(sec)
quality 1(low) 10 50 100(high)
VC2005 8.0 8.0 8.0 8.0
Xbyak 1.6 0.8 0.5 0.5
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
shr eax,4
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov edx,0BA2E8BA3h
mul eax,edx
shr edx,3
...
; generated code at q = 100
push esi
push edi
mov edi,dword ptr [esp+0Ch]
mov esi,dword ptr [esp+10h]
mov eax,dword ptr [esi]
mov dword ptr [edi],eax
mov eax,dword ptr [esi+4]
mov dword ptr [edi+4],eax
mov eax,dword ptr [esi+8]
mov dword ptr [edi+8],eax
mov eax,dword ptr [esi+0Ch]
...
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#ifdef _MSC_VER
#pragma warning(disable : 4996) // scanf
#endif
typedef Xbyak::uint64 uint64;
typedef Xbyak::uint32 uint32;
const int N = 64;
class Quantize : public Xbyak::CodeGenerator {
static int ilog2(int x)
{
int shift = 0;
while ((1 << shift) <= x) shift++;
return shift - 1;
}
public:
/*
input : esi
output : eax = [esi+offset] / dividend
destroy : edx
*/
void udiv(uint32 dividend, int offset)
{
mov(eax, ptr[esi + offset]);
/* dividend = odd x 2^exponent */
int exponent = 0, odd = dividend;
while ((odd & 1) == 0) {
odd >>= 1; exponent++;
}
if (odd == 1) { // trivial case
if (exponent) {
shr(eax, exponent);
}
return;
}
uint64 mLow, mHigh;
int len = ilog2(odd) + 1;
{
uint64 roundUp = uint64(1) << (32 + len);
uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
mLow = roundUp / odd;
mHigh = (roundUp + k) / odd;
}
while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
mLow >>= 1; mHigh >>= 1; len--;
}
uint64 m; int a;
if ((mHigh >> 32) == 0) {
m = mHigh; a = 0;
} else {
len = ilog2(odd);
uint64 roundDown = uint64(1) << (32 + len);
mLow = roundDown / odd;
int r = (int)(roundDown % odd);
m = (r <= (odd >> 1)) ? mLow : mLow + 1;
a = 1;
}
while ((m & 1) == 0) {
m >>= 1; len--;
}
len += exponent;
mov(edx, int(m));
mul(edx);
if (a) {
add(eax, int(m));
adc(edx, 0);
}
if (len) {
shr(edx, len);
}
mov(eax, edx);
}
/*
quantize(uint32 dest[64], const uint32 src[64]);
*/
Quantize(const uint32 qTbl[64])
{
push(esi);
push(edi);
const int P_ = 4 * 2;
mov(edi, ptr [esp+P_+4]); // dest
mov(esi, ptr [esp+P_+8]); // src
for (int i = 0; i < N; i++) {
udiv(qTbl[i], i * 4);
mov(ptr[edi+i*4], eax);
}
pop(edi);
pop(esi);
ret();
}
};
void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
{
for (int i = 0; i < N; i++) {
dest[i] = src[i] / qTbl[i];
}
}
#ifdef XBYAK64
int main()
{
puts("not implemented for 64bit");
return 1;
}
#else
int main(int argc, char *argv[])
{
int q;
if (argc > 1) {
q = atoi(argv[1]);
} else {
printf("input quantize=");
if (scanf("%d", &q) != 1) {
fprintf(stderr, "bad number\n");
return 1;
}
}
printf("q=%d\n", q);
uint32 qTbl[] = {
16, 11, 10, 16, 24, 40, 51, 61,
12, 12, 14, 19, 26, 58, 60, 55,
14, 13, 16, 24, 40, 57, 69, 56,
14, 17, 22, 29, 51, 87, 80, 62,
18, 22, 37, 56, 68, 109, 103, 77,
24, 35, 55, 64, 81, 104, 113, 92,
49, 64, 78, 87, 103, 121, 120, 101,
72, 92, 95, 98, 112, 100, 103, 99
};
for (int i = 0; i < N; i++) {
qTbl[i] /= q;
if (qTbl[i] == 0) qTbl[i] = 1;
}
try {
uint32 src[N];
uint32 dest[N];
uint32 dest2[N];
for (int i = 0; i < N; i++) {
src[i] = rand() % 2048;
}
Quantize jit(qTbl);
//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
void (*quantize2)(uint32*, const uint32*, const uint32 *) = jit.getCode<void (*)(uint32*, const uint32*, const uint32 *)>();
quantize(dest, src, qTbl);
quantize2(dest2, src, qTbl);
for (int i = 0; i < N; i++) {
if (dest[i] != dest2[i]) {
printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
}
}
const int count = 10000000;
int begin;
begin = clock();
for (int i = 0; i < count; i++) {
quantize(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
begin = clock();
for (int i = 0; i < count; i++) {
quantize2(dest, src, qTbl);
}
printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}
#endif

427
sample/quantize.vcproj Normal file
View file

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="quantize"
ProjectGUID="{D06753BF-E1F3-4578-9B18-08673327F77C}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/quantize.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/quantize.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/quantize.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/quantize.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/quantize.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/quantize.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/quantize.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/quantize.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/quantize.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/quantize.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/quantize.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="quantize.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

29
sample/stackframe.cpp Normal file
View file

@ -0,0 +1,29 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak_util.h>
#ifdef XBYAK32
#error "this sample is for only 64-bit mode"
#endif
struct Code : public Xbyak::CodeGenerator {
Code()
{
// see xbyak/sample/sf_test.cpp for how to use other parameter
Xbyak::util::StackFrame sf(this, 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
};
int main()
{
Code c;
int (*f)(int, int, int) = c.getCode<int(*) (int, int, int)>();
int ret = f(3, 5, 2);
if (ret == 3 + 5 + 2) {
puts("ok");
} else {
puts("ng");
}
}

41
sample/static_buf.cpp Normal file
View file

@ -0,0 +1,41 @@
/*
sample to use static memory
*/
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
MIE_ALIGN(4096) char buf[4096];
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(sizeof(buf), buf)
{
puts("generate");
printf("ptr=%p, %p\n", getCode(), buf);
Xbyak::CodeArray::protect(buf, sizeof(buf), true);
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, ptr [esp + 8]);
#elif defined(XBYAK64_WIN)
lea(rax, ptr [rcx + rdx]);
#else
lea(rax, ptr [rdi + rsi]);
#endif
ret();
}
} s_code;
inline int add(int a, int b)
{
return Xbyak::CastTo<int (*)(int,int)>(buf)(a, b);
}
int main()
{
int sum = 0;
for (int i = 0; i < 10; i++) {
sum += add(i, 5);
}
printf("sum=%d\n", sum);
}

186
sample/test0.cpp Normal file
View file

@ -0,0 +1,186 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#if defined(_MSC_VER) && (_MSC_VER >= 1900)
#pragma warning(disable:4456)
#endif
#include <stdio.h>
#include <stdlib.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
class Sample : public Xbyak::CodeGenerator {
void operator=(const Sample&);
public:
Sample(void *userPtr = 0, size_t size = Xbyak::DEFAULT_MAX_CODE_SIZE) : Xbyak::CodeGenerator(size, userPtr)
{
inLocalLabel(); // use local label for multiple instance
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]); // n
#elif defined(XBYAK64_GCC)
mov(ecx, edi); // n
#else
// n = ecx
#endif
xor_(eax, eax); // sum
test(ecx, ecx);
jz(".exit");
xor_(edx, edx); // i
L(".lp");
add(eax, edx);
inc(edx);
cmp(edx, ecx);
jbe(".lp"); // jmp to previous @@
L(".exit"); // <B>
ret();
outLocalLabel(); // end of local label
}
};
class AddFunc : public Xbyak::CodeGenerator {
void operator=(const AddFunc&);
public:
AddFunc(int y)
{
#ifdef XBYAK32
mov(eax, ptr [esp + 4]);
add(eax, y);
#elif defined(XBYAK64_WIN)
lea(rax, ptr [rcx + y]);
#else
lea(eax, ptr [edi + y]);
#endif
ret();
}
int (*get() const)(int) { return getCode<int(*)(int)>(); }
};
class CallAtoi : public Xbyak::CodeGenerator {
void operator=(const CallAtoi&);
public:
CallAtoi()
{
#ifdef XBYAK64
#ifdef XBYAK64_WIN
sub(rsp, 32); // return-address is destroied if 64bit debug mode
#endif
mov(rax, (size_t)atoi);
call(rax);
#ifdef XBYAK64_WIN
add(rsp, 32);
#endif
#else
mov(eax, ptr [esp + 4]);
push(eax);
#ifdef XBYAK_VARIADIC_TEMPLATE
call(atoi);
#else
call(Xbyak::CastTo<void*>(atoi));
#endif
add(esp, 4);
#endif
ret();
}
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
};
class JmpAtoi : public Xbyak::CodeGenerator {
void operator=(const JmpAtoi&);
public:
JmpAtoi()
{
/* already pushed "456" */
#ifdef XBYAK64
mov(rax, (size_t)atoi);
jmp(rax);
#else
jmp(Xbyak::CastTo<void*>(atoi));
#endif
}
int (*get() const)(const char *) { return getCode<int (*)(const char *)>(); }
};
struct Reset : public Xbyak::CodeGenerator {
void init(int n)
{
xor_(eax, eax);
mov(ecx, n);
test(ecx, ecx);
jnz("@f");
ret();
L("@@");
for (int i = 0; i < 10 - n; i++) {
add(eax, ecx);
}
sub(ecx, 1);
jnz("@b");
ret();
}
};
void testReset()
{
puts("testReset");
Reset code;
int (*f)(int) = code.getCode<int(*)(int)>();
for (int i = 0; i < 10; i++) {
code.init(i);
int v = f(i);
printf("%d %d\n", i, v);
code.reset();
}
}
int main()
{
try {
Sample s;
printf("Xbyak version=%s\n", s.getVersionString());
#ifdef XBYAK64_GCC
puts("64bit mode(gcc)");
#elif defined(XBYAK64_WIN)
puts("64bit mode(win)");
#else
puts("32bit");
#endif
int (*func)(int) = s.getCode<int (*)(int)>();
for (int i = 0; i <= 10; i++) {
printf("0 + ... + %d = %d\n", i, func(i));
}
for (int i = 0; i < 10; i++) {
AddFunc a(i);
int (*add)(int) = a.get();
int y = add(i);
printf("%d + %d = %d\n", i, i, y);
}
CallAtoi c;
printf("call atoi(\"123\") = %d\n", c.get()("123"));
JmpAtoi j;
printf("jmp atoi(\"456\") = %d\n", j.get()("456"));
{
// use memory allocated by user
using namespace Xbyak;
const size_t codeSize = 1024;
uint8 buf[codeSize + 16];
uint8 *p = CodeArray::getAlignedAddress(buf);
CodeArray::protect(p, codeSize, true);
Sample s(p, codeSize);
int (*func)(int) = s.getCode<int (*)(int)>();
if (Xbyak::CastTo<uint8*>(func) != p) {
fprintf(stderr, "internal error %p %p\n", p, Xbyak::CastTo<uint8*>(func));
return 1;
}
printf("0 + ... + %d = %d\n", 100, func(100));
CodeArray::protect(p, codeSize, false);
}
puts("OK");
testReset();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

427
sample/test0.vcproj Normal file
View file

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="test0"
ProjectGUID="{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test0.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test0.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test0.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test0.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test0.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test0.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/test0.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test0.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test0.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test0.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test0.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="test0.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

109
sample/test_util.cpp Normal file
View file

@ -0,0 +1,109 @@
#include <stdio.h>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
struct PopCountTest : public Xbyak::CodeGenerator {
PopCountTest(int n)
{
mov(eax, n);
popcnt(eax, eax);
ret();
}
};
void putCPUinfo()
{
using namespace Xbyak::util;
Cpu cpu;
printf("vendor %s\n", cpu.has(Cpu::tINTEL) ? "intel" : "amd");
static const struct {
Cpu::Type type;
const char *str;
} tbl[] = {
{ Cpu::tMMX, "mmx" },
{ Cpu::tMMX2, "mmx2" },
{ Cpu::tCMOV, "cmov" },
{ Cpu::tSSE, "sse" },
{ Cpu::tSSE2, "sse2" },
{ Cpu::tSSE3, "sse3" },
{ Cpu::tSSSE3, "ssse3" },
{ Cpu::tSSE41, "sse41" },
{ Cpu::tSSE42, "sse42" },
{ Cpu::tPOPCNT, "popcnt" },
{ Cpu::t3DN, "3dn" },
{ Cpu::tE3DN, "e3dn" },
{ Cpu::tSSE4a, "sse4a" },
{ Cpu::tSSE5, "sse5" },
{ Cpu::tAESNI, "aesni" },
{ Cpu::tRDTSCP, "rdtscp" },
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
{ Cpu::tPCLMULQDQ, "pclmulqdq" },
{ Cpu::tAVX, "avx" },
{ Cpu::tFMA, "fma" },
{ Cpu::tAVX2, "avx2" },
{ Cpu::tBMI1, "bmi1" },
{ Cpu::tBMI2, "bmi2" },
{ Cpu::tLZCNT, "lzcnt" },
{ Cpu::tPREFETCHW, "prefetchw" },
{ Cpu::tENHANCED_REP, "enh_rep" },
{ Cpu::tRDRAND, "rdrand" },
{ Cpu::tADX, "adx" },
{ Cpu::tRDSEED, "rdseed" },
{ Cpu::tSMAP, "smap" },
{ Cpu::tHLE, "hle" },
{ Cpu::tRTM, "rtm" },
{ Cpu::tMPX, "mpx" },
{ Cpu::tSHA, "sha" },
{ Cpu::tPREFETCHWT1, "prefetchwt1" },
{ Cpu::tF16C, "f16c" },
{ Cpu::tMOVBE, "movbe" },
{ Cpu::tAVX512F, "avx512f" },
{ Cpu::tAVX512DQ, "avx512dq" },
{ Cpu::tAVX512IFMA, "avx512ifma" },
{ Cpu::tAVX512PF, "avx512pf" },
{ Cpu::tAVX512ER, "avx512er" },
{ Cpu::tAVX512CD, "avx512cd" },
{ Cpu::tAVX512BW, "avx512bw" },
{ Cpu::tAVX512VL, "avx512vl" },
{ Cpu::tAVX512VBMI, "avx512vbmi" },
{ Cpu::tAVX512_4VNNIW, "avx512_4vnniw" },
{ Cpu::tAVX512_4FMAPS, "avx512_4fmaps" },
};
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
}
printf("\n");
if (cpu.has(Cpu::tPOPCNT)) {
const int n = 0x12345678; // bitcount = 13
const int ok = 13;
int r = PopCountTest(n).getCode<int (*)()>()();
if (r == ok) {
puts("popcnt ok");
} else {
printf("popcnt ng %d %d\n", r, ok);
}
}
/*
displayFamily displayModel
Opteron 2376 10 4
Core2 Duo T7100 6 F
Core i3-2120T 6 2A
Core i7-2600 6 2A
Xeon X5650 6 2C
Core i7-3517 6 3A
Core i7-3930K 6 2D
*/
cpu.putFamily();
}
int main()
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
putCPUinfo();
}

427
sample/test_util.vcproj Normal file
View file

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="test_util"
ProjectGUID="{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test_util.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test_util.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/test_util.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/test_util.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test_util.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test_util.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/test_util.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/test_util.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/test_util.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/test_util.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/test_util.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="test_util.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

380
sample/toyvm.cpp Normal file
View file

@ -0,0 +1,380 @@
/*
toy vm
register A, B : 32bit
PC : program counter
mem_ 4byte x 65536
4byte固定
16bit
R = A or B
vldiR, imm ; R = imm
vldR, idx ; R = mem_[idx]
vstR, idx ; mem_[idx] = R
vaddiR, imm ; R += imm
vsubiR, imm ; R -= imm
vaddR, idx ; R += mem_[idx]
vsubR, idx ; R -= mem_[idx]
vputR ; print R
vjnzR, offset; if (R != 0) then jmp(PC += offset(signed))
*/
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <vector>
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
#ifdef XBYAK64
#error "only 32bit"
#endif
using namespace Xbyak;
class ToyVm : public Xbyak::CodeGenerator {
typedef std::vector<uint32> Buffer;
public:
enum Reg {
A, B
};
enum Code {
LD, LDI, ST, ADD, ADDI, SUB, SUBI, PUT, JNZ,
END_OF_CODE
};
ToyVm()
: mark_(0)
{
::memset(mem_, 0, sizeof(mem_));
}
void vldi(Reg r, uint16 imm) { encode(LDI, r, imm); }
void vld(Reg r, uint16 idx) { encode(LD, r, idx); }
void vst(Reg r, uint16 idx) { encode(ST, r, idx); }
void vadd(Reg r, uint16 idx) { encode(ADD, r, idx); }
void vaddi(Reg r, uint16 imm) { encode(ADDI, r, imm); }
void vsub(Reg r, uint16 idx) { encode(SUB, r, idx); }
void vsubi(Reg r, uint16 imm) { encode(SUBI, r, imm); }
void vjnz(Reg r, int offset) { encode(JNZ, r, static_cast<uint16>(offset)); }
void vput(Reg r) { encode(PUT, r); }
void setMark()
{
mark_ = (int)code_.size();
}
int getMarkOffset()
{
return mark_ - (int)code_.size() - 1;
}
void run()
{
bool debug = false;//true;
uint32 reg[2] = { 0, 0 };
const size_t end = code_.size();
uint32 pc = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
if (debug) {
printf("---\n");
printf("A %08x B %08x\n", reg[0], reg[1]);
printf("mem_[] = %08x %08x %08x\n", mem_[0], mem_[1], mem_[2]);
printf("pc=%4d, code=%02x, r=%d, imm=%04x\n", pc, code, r, imm);
}
switch (code) {
case LDI:
reg[r] = imm;
break;
case LD:
reg[r] = mem_[imm];
break;
case ST:
mem_[imm] = reg[r];
break;
case ADD:
reg[r] += mem_[imm];
break;
case ADDI:
reg[r] += imm;
break;
case SUB:
reg[r] -= mem_[imm];
break;
case SUBI:
reg[r] -= imm;
break;
case PUT:
printf("%c %8d(0x%08x)\n", 'A' + r, reg[r], reg[r]);
break;
case JNZ:
if (reg[r] != 0) pc += static_cast<signed short>(imm);
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
}
void recompile()
{
using namespace Xbyak;
/*
esi : A
edi : B
ebx : mem_
for speed up
mem_[0] : eax
mem_[1] : ecx
mem_[2] : edx
*/
push(ebx);
push(esi);
push(edi);
const Reg32 reg[2] = { esi, edi };
const Reg32 mem(ebx);
const Reg32 memTbl[] = { eax, ecx, edx };
const size_t memTblNum = NUM_OF_ARRAY(memTbl);
for (size_t i = 0; i < memTblNum; i++) xor_(memTbl[i], memTbl[i]);
xor_(esi, esi);
xor_(edi, edi);
mov(mem, (size_t)mem_);
const size_t end = code_.size();
uint32 pc = 0;
uint32 labelNum = 0;
for (;;) {
uint32 x = code_[pc];
uint32 code, r, imm;
decode(code, r, imm, x);
L(Label::toStr(labelNum++));
switch (code) {
case LDI:
mov(reg[r], imm);
break;
case LD:
if (imm < memTblNum) {
mov(reg[r], memTbl[imm]);
} else {
mov(reg[r], ptr[mem + imm * 4]);
}
break;
case ST:
if (imm < memTblNum) {
mov(memTbl[imm], reg[r]);
} else {
mov(ptr [mem + imm * 4], reg[r]);
}
break;
case ADD:
if (imm < memTblNum) {
add(reg[r], memTbl[imm]);
} else {
add(reg[r], ptr [mem + imm * 4]);
}
break;
case ADDI:
add(reg[r], imm);
break;
case SUB:
if (imm < memTblNum) {
sub(reg[r], memTbl[imm]);
} else {
sub(reg[r], ptr [mem + imm * 4]);
}
break;
case SUBI:
sub(reg[r], imm);
break;
case PUT:
{
static const char *str = "%c %8d(0x%08x)\n";
push(eax);
push(edx);
push(ecx);
push(reg[r]);
push(reg[r]);
push('A' + r);
push((int)str);
call(Xbyak::CastTo<void*>(printf));
add(esp, 4 * 4);
pop(ecx);
pop(edx);
pop(eax);
}
break;
case JNZ:
test(reg[r], reg[r]);
jnz(Label::toStr(labelNum + static_cast<signed short>(imm)));
break;
default:
assert(0);
break;
}
pc++;
if (pc >= end) break;
} // for (;;)
pop(edi);
pop(esi);
pop(ebx);
ret();
}
private:
uint32 mem_[65536];
Buffer code_;
int mark_;
void decode(uint32& code, uint32& r, uint32& imm, uint32 x)
{
code = x >> 24;
r = (x >> 16) & 0xff;
imm = x & 0xffff;
}
void encode(Code code, Reg r, uint16 imm = 0)
{
uint32 x = (code << 24) | (r << 16) | imm;
code_.push_back(x);
}
};
class Fib : public ToyVm {
public:
Fib(int n)
{
if (n >= 65536) {
fprintf(stderr, "current version support only imm16\n");
return;
}
/*
A : c
B : temporary
mem_[0] : p
mem_[1] : t
mem_[2] : n
*/
vldi(A, 1); // c
vst(A, 0); // p(1)
vldi(B, static_cast<uint16>(n));
vst(B, 2); // n
// lp
setMark();
vst(A, 1); // t = c
vadd(A, 0); // c += p
vld(B, 1);
vst(B, 0); // p = t
// vput(A);
vld(B, 2);
vsubi(B, 1);
vst(B, 2); // n--
vjnz(B, getMarkOffset());
vput(A);
}
void runByJIT()
{
getCode<void (*)()>();
}
};
void fibC(uint32 n)
{
uint32 p, c, t;
p = 1;
c = 1;
lp:
t = c;
c += p;
p = t;
n--;
if (n != 0) goto lp;
printf("c=%d(0x%08x)\n", c, c);
}
int main()
{
try {
const int n = 10000;
Fib fib(n);
fib.recompile();
{
Xbyak::util::Clock clk;
clk.begin();
fib.run();
clk.end();
printf("vm %.2fKclk\n", clk.getClock() * 1e-3);
}
{
Xbyak::util::Clock clk;
clk.begin();
fib.runByJIT();
clk.end();
printf("jit %.2fKclk\n", clk.getClock() * 1e-3);
}
{
Xbyak::util::Clock clk;
clk.begin();
fibC(n);
clk.end();
printf("native C %.2fKclk\n", clk.getClock() * 1e-3);
}
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
return 0;
}
/*
the code generated by Xbyak
push ebx
push esi
push edi
xor eax,eax
xor ecx,ecx
xor edx,edx
xor esi,esi
xor edi,edi
mov ebx,0EFF58h
mov esi,1
mov eax,esi
mov edi,2710h
mov edx,edi
.lp:
mov ecx,esi
add esi,eax
mov edi,ecx
mov eax,edi
mov edi,edx
sub edi,1
mov edx,edi
test edi,edi
jne .lp
push eax
push edx
push ecx
push esi
push esi
push 41h
push 42C434h
call printf (409342h)
add esp,10h
pop ecx
pop edx
pop eax
pop edi
pop esi
pop ebx
ret
*/

427
sample/toyvm.vcproj Normal file
View file

@ -0,0 +1,427 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="toyvm"
ProjectGUID="{2E41C7AF-39FF-454C-B081-37445378DCB3}"
TargetFrameworkVersion="0"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Debug/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/toyvm.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/toyvm.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\Release/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/toyvm.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/toyvm.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Debug/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
PrecompiledHeaderFile=".\Debug/toyvm.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="4"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Debug/toyvm.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Debug/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\Release/toyvm.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
PrecompiledHeaderFile=".\Release/toyvm.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="4"
SuppressStartupBanner="true"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1041"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\Release/toyvm.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\Release/toyvm.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile=".\Release/toyvm.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="toyvm.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories=""
PreprocessorDefinitions=""
/>
</FileConfiguration>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

8
test/6.bat Normal file
View file

@ -0,0 +1,8 @@
@echo off
rm a.lst b.lst
echo nasm
nasm -l a.lst -f win64 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win64 test.asm
cat b.lst

74
test/Makefile Normal file
View file

@ -0,0 +1,74 @@
TARGET = make_nm normalize_prefix jmp address nm_frame bad_address misc
XBYAK_INC=../xbyak/xbyak.h
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ifeq ($(MODE_BIT),64)
TARGET += jmp64 address64
endif
all: $(TARGET)
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) #-std=c++0x
make_nm:
$(CXX) $(CFLAGS) make_nm.cpp -o $@
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) normalize_prefix.cpp -o $@
test_mmx: test_mmx.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) test_mmx.cpp -o $@ -lpthread
jmp: jmp.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m32
jmp64: jmp.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) jmp.cpp -o $@ -m64
address: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
address64: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
nm_frame: nm_frame.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) nm_frame.cpp -o $@ -m32
bad_address: bad_address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) bad_address.cpp -o $@
misc: misc.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) misc.cpp -o $@
test: normalize_prefix jmp bad_address
$(MAKE) -C ../gen
./test_nm.sh
./test_nm.sh Y
./test_address.sh
./jmp
./bad_address
./misc
ifeq ($(BIT),64)
./test_address.sh 64
./test_nm.sh 64
./test_nm.sh Y64
./jmp64
endif
test_avx: normalize_prefix
./test_avx.sh
./test_avx.sh Y
ifeq ($(BIT),64)
./test_address.sh 64
./test_avx.sh 64
./test_avx.sh Y64
endif
test_avx512: normalize_prefix
./test_avx512.sh
ifeq ($(BIT),64)
./test_avx512.sh 64
endif
clean:
rm -rf *.o $(TARGET) lib_run
lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
make_nm: make_nm.cpp $(XBYAK_INC)

14
test/Makefile.win Normal file
View file

@ -0,0 +1,14 @@
OPT=/EHsc -I../xbyak /W4 -D_CRT_SECURE_NO_WARNINGS
../xbyak/xbyak_mnemonic.h: ../gen/gen_code.exe ../gen/gen_avx512.exe
../gen/gen_code.exe > $@
../gen/gen_avx512.exe >> $@
../gen/gen_code.exe: ../gen/gen_code.cpp #../xbyak/xbyak.h
cl ../gen/gen_code.cpp $(OPT) /Fe:../gen/gen_code.exe
../gen/gen_avx512.exe: ../gen/gen_avx512.cpp #../xbyak/xbyak.h
cl ../gen/gen_avx512.cpp $(OPT) /Fe:../gen/gen_avx512.exe
SUB_HEADER=../xbyak/xbyak_mnemonic.h
all: $(SUB_HEADER)

9
test/a.bat Normal file
View file

@ -0,0 +1,9 @@
@echo off
echo 32bit
rm -rf a.lst b.lst
echo nasm
nasm -l a.lst -f win32 -DWIN32 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win32 -DWIN32 test.asm
cat b.lst

155
test/address.cpp Normal file
View file

@ -0,0 +1,155 @@
#include <stdio.h>
#include <string.h>
#define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
void genVsibSub(bool isJIT, const char *name, const char *tbl[], size_t tblSize)
{
for (size_t i = 0; i < tblSize; i++) {
if (isJIT) {
printf("%s (ymm7, ptr[", name);
} else {
printf("%s ymm7, [", name);
}
printf("%s", tbl[i]);
if (isJIT) {
printf("], ymm4); dump();\n");
} else {
printf("], ymm4\n");
}
}
}
void genVsib(bool isJIT)
{
if (isJIT) puts("void genVsib() {");
const char *vm32xTbl[] = {
"xmm0",
"xmm0 * 1",
"xmm0 + 4",
"xmm0 + eax",
"xmm0 * 4 + ecx",
"xmm3 * 8 + edi + 123",
"xmm2 * 2 + 5",
"eax + xmm0",
"esp + xmm4",
};
const char *vm32yTbl[] = {
"ymm0",
"ymm0 * 1",
"ymm0 + 4",
"ymm0 + eax",
"ymm0 * 4 + ecx",
"ymm3 * 8 + edi + 123",
"ymm2 * 2 + 5",
"eax + ymm0",
"esp + ymm4",
};
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
#ifdef XBYAK64
const char *vm32x64Tbl[] = {
"xmm0 + r11",
"r13 + xmm15",
"123 + rsi + xmm2 * 4",
};
genVsibSub(isJIT, "vgatherdpd", vm32x64Tbl, NUM_OF_ARRAY(vm32x64Tbl));
#endif
if (isJIT) puts("}");
}
void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
{
int count = 0;
int funcNum = 1;
if (isJIT) {
puts("void gen0(){");
}
for (size_t i = 0; i < regTblNum + 1; i++) {
const char *base = regTbl[i];
for (size_t j = 0; j < regTblNum + 1; j++) {
if (j == 4) continue; /* esp is not index register */
const char *index = regTbl[j];
static const int scaleTbl[] = { 0, 1, 2, 4, 8 };
for (size_t k = 0; k < NUM_OF_ARRAY(scaleTbl); k++) {
int scale = scaleTbl[k];
static const int dispTbl[] = { 0, 1, 1000, -1, -1000 };
for (size_t m = 0; m < NUM_OF_ARRAY(dispTbl); m++) {
int disp = dispTbl[m];
bool isFirst = true;
if (isJIT) {
printf("mov (ecx, ptr[");
} else {
printf("mov ecx, [");
}
if (i < regTblNum) {
printf("%s", base);
isFirst = false;
}
if (j < regTblNum) {
if (!isFirst) putchar('+');
printf("%s", index);
if (scale) printf("*%d", scale);
isFirst = false;
}
if (isFirst) {
if (isJIT) printf("(void*)");
printf("0x%08X", disp);
} else {
if (disp >= 0) {
putchar('+');
}
printf("%d", disp);
isFirst = false;
}
if (isJIT) {
printf("]); dump();\n");
} else {
printf("]\n");
}
if (isJIT) {
count++;
if ((count % 100) == 0) {
printf("}\n void gen%d(){\n", funcNum++);
}
}
}
}
}
}
if (isJIT) puts("}");
genVsib(isJIT);
if (isJIT) {
printf("void gen(){\n");
for (int i = 0; i < funcNum; i++) {
printf(" gen%d();\n", i);
}
puts("genVsib();");
printf("}\n");
}
}
int main(int argc, char *argv[])
{
argc--, argv++;
bool phase = argc > 0 && strcmp(*argv, "1") == 0;
bool isJIT = (argc > 1);
fprintf(stderr, "phase:%c %s\n", phase ? '1' : '2', isJIT ? "jit" : "asm");
if (phase) {
fprintf(stderr, "32bit reg\n");
static const char reg32Tbl[][5] = {
"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
#ifdef XBYAK64
"r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d",
#endif
};
genAddress(isJIT, reg32Tbl, NUM_OF_ARRAY(reg32Tbl));
} else {
#ifdef XBYAK64
fprintf(stderr, "64bit reg\n");
static const char reg64Tbl[][5] = {
"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
};
genAddress(isJIT, reg64Tbl, NUM_OF_ARRAY(reg64Tbl));
#endif
}
}

45
test/bad_address.cpp Normal file
View file

@ -0,0 +1,45 @@
#include <xbyak/xbyak.h>
#define TEST_EXCEPTION(state) \
{ \
num++; \
bool exception = false; \
try { \
state; \
} catch (...) { \
exception = true; \
} \
if (!exception) { \
printf("exception should arise for %s\n", #state); \
err++; \
} \
}
struct Code : Xbyak::CodeGenerator {
Code()
{
int err = 0;
int num = 0;
TEST_EXCEPTION(mov(eax, ptr [esp + esp]));
TEST_EXCEPTION(mov(eax, ptr [ax])); // not support
TEST_EXCEPTION(mov(eax, ptr [esp * 4]));
TEST_EXCEPTION(mov(eax, ptr [eax * 16]));
TEST_EXCEPTION(mov(eax, ptr [eax + eax + eax]));
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
TEST_EXCEPTION(mov(eax, ptr [eax * 2 + ecx * 4]));
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [eax * 2], ymm3));
TEST_EXCEPTION(vgatherdpd(xmm0, ptr [xmm0 + xmm1], ymm3));
#ifdef XBYAK64
TEST_EXCEPTION(mov(eax, ptr [rax + eax]));
TEST_EXCEPTION(mov(eax, ptr [xmm0 + ymm0]));
#endif
if (!err) {
printf("bad_address test %d ok\n", num);
}
}
};
int main()
{
Code c;
}

125
test/cvt_test.cpp Normal file
View file

@ -0,0 +1,125 @@
#include <xbyak/xbyak.h>
using namespace Xbyak;
using namespace Xbyak::util;
#ifdef XBYAK64
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
Reg64 reg64;
} tbl[] = {
{ &al, ax, eax, rax },
{ &bl, bx, ebx, rbx },
{ &cl, cx, ecx, rcx },
{ &dl, dx, edx, rdx },
{ &sil, si, esi, rsi },
{ &dil, di, edi, rdi },
{ &bpl, bp, ebp, rbp },
{ &spl, sp, esp, rsp },
{ &r8b, r8w, r8d, r8 },
{ &r9b, r9w, r9d, r9 },
{ &r10b, r10w, r10d, r10 },
{ &r11b, r11w, r11d, r11 },
{ &r12b, r12w, r12d, r12 },
{ &r13b, r13w, r13d, r13 },
{ &r14b, r14w, r14d, r14 },
{ &r15b, r15w, r15d, r15 },
};
#else
const struct Ptn {
const Reg8 *reg8;
Reg16 reg16;
Reg32 reg32;
} tbl[] = {
{ &al, ax, eax },
{ &bl, bx, ebx },
{ &cl, cx, ecx },
{ &dl, dx, edx },
{ 0, si, esi },
{ 0, di, edi },
{ 0, bp, ebp },
{ 0, sp, esp },
};
#endif
int errNum = 0;
int testNum = 0;
template<class T>
void verify(const T& x, const T& y)
{
if (x != y) {
printf("ERR %s %s\n", x.toString(), y.toString());
errNum++;
}
testNum++;
}
#define verifyExp(state) \
{ \
bool isOK = false; \
try { \
state; \
} catch (const Xbyak::Error& e) { \
if ((int)e == ERR_CANT_CONVERT) { \
isOK = true; \
} \
} \
if (!isOK) { \
printf("ERR " #state "\n"); \
errNum++; \
} \
testNum++; \
}
int main()
try
{
for (size_t i = 0; i < sizeof(tbl) / sizeof(tbl[0]); i++) {
if (tbl[i].reg8) {
verify(tbl[i].reg8->cvt8(), *tbl[i].reg8);
verify(tbl[i].reg8->cvt16(), tbl[i].reg16);
verify(tbl[i].reg8->cvt32(), tbl[i].reg32);
verify(tbl[i].reg16.cvt8(), *tbl[i].reg8);
verify(tbl[i].reg32.cvt8(), *tbl[i].reg8);
}
verify(tbl[i].reg16.cvt16(), tbl[i].reg16);
verify(tbl[i].reg16.cvt32(), tbl[i].reg32);
verify(tbl[i].reg32.cvt16(), tbl[i].reg16);
verify(tbl[i].reg32.cvt32(), tbl[i].reg32);
#ifdef XBYAK64
if (tbl[i].reg8) {
verify(tbl[i].reg64.cvt8(), *tbl[i].reg8);
verify(tbl[i].reg8->cvt64(), tbl[i].reg64);
}
verify(tbl[i].reg64.cvt16(), tbl[i].reg16);
verify(tbl[i].reg64.cvt32(), tbl[i].reg32);
verify(tbl[i].reg64.cvt64(), tbl[i].reg64);
verify(tbl[i].reg16.cvt64(), tbl[i].reg64);
verify(tbl[i].reg32.cvt64(), tbl[i].reg64);
#endif
}
{
const Reg8 errTbl[] = {
ah, bh, ch, dh
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
verifyExp(errTbl[i].cvt16());
}
}
#ifdef XBYAK32
{
const Reg16 errTbl[] = {
si, di, bp, sp
};
for (size_t i = 0; i < sizeof(errTbl) / sizeof(errTbl[0]); i++) {
verifyExp(errTbl[i].cvt8());
}
}
#endif
printf("test=%d(err=%d)\n", testNum, errNum);
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}

27
test/cybozu/COPYRIGHT Normal file
View file

@ -0,0 +1,27 @@
Copyright (c) 2007-2012 Cybozu Labs, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the Cybozu Labs, Inc. nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

121
test/cybozu/inttype.hpp Normal file
View file

@ -0,0 +1,121 @@
#pragma once
/**
@file
@brief int type definition and macros
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
*/
#if defined(_MSC_VER) && (MSC_VER <= 1500)
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned char uint8_t;
typedef signed char int8_t;
#else
#include <stdint.h>
#endif
#ifdef _MSC_VER
#ifndef CYBOZU_DEFINED_SSIZE_T
#define CYBOZU_DEFINED_SSIZE_T
#ifdef _WIN64
typedef int64_t ssize_t;
#else
typedef int32_t ssize_t;
#endif
#endif
#else
#include <unistd.h> // for ssize_t
#endif
#ifndef CYBOZU_ALIGN
#ifdef _MSC_VER
#define CYBOZU_ALIGN(x) __declspec(align(x))
#else
#define CYBOZU_ALIGN(x) __attribute__((aligned(x)))
#endif
#endif
#ifndef CYBOZU_ALLOCA
#ifdef _MSC_VER
#include <malloc.h>
#define CYBOZU_ALLOCA(x) _malloca(x)
#else
#define CYBOZU_ALLOCA_(x) __builtin_alloca(x)
#endif
#endif
#ifndef CYBOZU_FOREACH
// std::vector<int> v; CYBOZU_FOREACH(auto x, v) {...}
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define CYBOZU_FOREACH(type_x, xs) for each (type_x in xs)
#elif defined(__GNUC__)
#define CYBOZU_FOREACH(type_x, xs) for (type_x : xs)
#endif
#endif
#ifndef CYBOZU_NUM_OF_ARRAY
#define CYBOZU_NUM_OF_ARRAY(x) (sizeof(x) / sizeof(*x))
#endif
#ifndef CYBOZU_SNPRINTF
#ifdef _MSC_VER
#define CYBOZU_SNPRINTF(x, len, ...) (void)_snprintf_s(x, len, len - 1, __VA_ARGS__)
#else
#define CYBOZU_SNPRINTF(x, len, ...) (void)snprintf(x, len, __VA_ARGS__)
#endif
#endif
#define CYBOZU_CPP_VERSION_CPP03 0
#define CYBOZU_CPP_VERSION_TR1 1
#define CYBOZU_CPP_VERSION_CPP11 2
#if (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__)
#if defined(_MSC_VER) && (_MSC_VER <= 1600)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP11
#endif
#elif (__GNUC__ >= 4 && __GNUC_MINOR__ >= 5) || (__clang_major__ >= 3)
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_TR1
#else
#define CYBOZU_CPP_VERSION CYBOZU_CPP_VERSION_CPP03
#endif
#if (CYBOZU_CPP_VERSION == CYBOZU_CPP_VERSION_TR1)
#define CYBOZU_NAMESPACE_STD std::tr1
#define CYBOZU_NAMESPACE_TR1_BEGIN namespace tr1 {
#define CYBOZU_NAMESPACE_TR1_END }
#else
#define CYBOZU_NAMESPACE_STD std
#define CYBOZU_NAMESPACE_TR1_BEGIN
#define CYBOZU_NAMESPACE_TR1_END
#endif
#ifndef CYBOZU_OS_BIT
#if defined(_WIN64) || defined(__x86_64__)
#define CYBOZU_OS_BIT 64
#else
#define CYBOZU_OS_BIT 32
#endif
#endif
#ifndef CYBOZU_ENDIAN
#define CYBOZU_ENDIAN_UNKNOWN 0
#define CYBOZU_ENDIAN_LITTLE 1
#define CYBOZU_ENDIAN_BIG 2
#if defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(__i386__)
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_LITTLE
#else
#define CYBOZU_ENDIAN CYBOZU_ENDIAN_UNKNOWN
#endif
#endif
namespace cybozu {
template<class T>
void disable_warning_unused_variable(const T&) { }
template<class T, class S>
T cast(const S* ptr) { return static_cast<T>(static_cast<const void*>(ptr)); }
template<class T, class S>
T cast(S* ptr) { return static_cast<T>(static_cast<void*>(ptr)); }
} // cybozu

345
test/cybozu/test.hpp Normal file
View file

@ -0,0 +1,345 @@
#pragma once
/**
@file
@brief unit test class
Copyright (C) 2008 Cybozu Labs, Inc., all rights reserved.
*/
#include <stdio.h>
#include <string.h>
#include <string>
#include <list>
#include <iostream>
#include <utility>
#if defined(_MSC_VER) && (MSC_VER <= 1500)
#include <cybozu/inttype.hpp>
#else
#include <stdint.h>
#endif
namespace cybozu { namespace test {
class AutoRun {
typedef void (*Func)();
typedef std::list<std::pair<const char*, Func> > UnitTestList;
public:
AutoRun()
: init_(0)
, term_(0)
, okCount_(0)
, ngCount_(0)
, exceptionCount_(0)
{
}
void setup(Func init, Func term)
{
init_ = init;
term_ = term;
}
void append(const char *name, Func func)
{
list_.push_back(std::make_pair(name, func));
}
void set(bool isOK)
{
if (isOK) {
okCount_++;
} else {
ngCount_++;
}
}
std::string getBaseName(const std::string& name) const
{
#ifdef _WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
size_t pos = name.find_last_of(sep);
std::string ret = name.substr(pos + 1);
pos = ret.find('.');
return ret.substr(0, pos);
}
int run(int, char *argv[])
{
std::string msg;
try {
if (init_) init_();
for (UnitTestList::const_iterator i = list_.begin(), ie = list_.end(); i != ie; ++i) {
std::cout << "ctest:module=" << i->first << std::endl;
try {
(i->second)();
} catch (std::exception& e) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by exception " << e.what() << std::endl;
} catch (...) {
exceptionCount_++;
std::cout << "ctest: " << i->first << " is stopped by unknown exception" << std::endl;
}
}
if (term_) term_();
} catch (std::exception& e) {
msg = std::string("ctest:err:") + e.what();
} catch (...) {
msg = "ctest:err: catch unknown exception";
}
fflush(stdout);
if (msg.empty()) {
std::cout << "ctest:name=" << getBaseName(*argv)
<< ", module=" << list_.size()
<< ", total=" << (okCount_ + ngCount_ + exceptionCount_)
<< ", ok=" << okCount_
<< ", ng=" << ngCount_
<< ", exception=" << exceptionCount_ << std::endl;
return 0;
} else {
std::cout << msg << std::endl;
return 1;
}
}
static inline AutoRun& getInstance()
{
static AutoRun instance;
return instance;
}
private:
Func init_;
Func term_;
int okCount_;
int ngCount_;
int exceptionCount_;
UnitTestList list_;
};
static AutoRun& autoRun = AutoRun::getInstance();
inline void test(bool ret, const std::string& msg, const std::string& param, const char *file, int line)
{
autoRun.set(ret);
if (!ret) {
printf("%s(%d):ctest:%s(%s);\n", file, line, msg.c_str(), param.c_str());
}
}
template<typename T, typename U>
bool isEqual(const T& lhs, const U& rhs)
{
return lhs == rhs;
}
inline bool isEqual(const char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(const char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
inline bool isEqual(char *lhs, char *rhs)
{
return strcmp(lhs, rhs) == 0;
}
// avoid to compare float directly
inline bool isEqual(float lhs, float rhs)
{
union fi {
float f;
uint32_t i;
} lfi, rfi;
lfi.f = lhs;
rfi.f = rhs;
return lfi.i == rfi.i;
}
// avoid to compare double directly
inline bool isEqual(double lhs, double rhs)
{
union di {
double d;
uint64_t i;
} ldi, rdi;
ldi.d = lhs;
rdi.d = rhs;
return ldi.i == rdi.i;
}
} } // cybozu::test
#ifndef CYBOZU_TEST_DISABLE_AUTO_RUN
int main(int argc, char *argv[])
{
return cybozu::test::autoRun.run(argc, argv);
}
#endif
/**
alert if !x
@param x [in]
*/
#define CYBOZU_TEST_ASSERT(x) cybozu::test::test(!!(x), "CYBOZU_TEST_ASSERT", #x, __FILE__, __LINE__)
/**
alert if x != y
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_EQUAL(x, y) { \
bool eq = cybozu::test::isEqual(x, y); \
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
if (!eq) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
/**
alert if fabs(x, y) >= eps
@param x [in]
@param y [in]
*/
#define CYBOZU_TEST_NEAR(x, y, eps) { \
bool isNear = fabs((x) - (y)) < eps; \
cybozu::test::test(isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
if (!isNear) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
bool eq = x == y; \
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
if (!eq) { \
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
} \
}
/**
always alert
@param msg [in]
*/
#define CYBOZU_TEST_FAIL(msg) cybozu::test::test(false, "CYBOZU_TEST_FAIL", msg, __FILE__, __LINE__)
/**
verify message in exception
*/
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
{ \
int ret = 0; \
std::string errMsg; \
try { \
statement; \
ret = 1; \
} catch (const Exception& e) { \
errMsg = e.what(); \
if (errMsg.find(msg) == std::string::npos) { \
ret = 2; \
} \
} catch (...) { \
ret = 3; \
} \
if (ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
if (ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else if (ret == 2) { \
std::cout << "ctest: bad exception msg:" << errMsg << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
{ \
int ret = 0; \
try { \
statement; \
ret = 1; \
} catch (const Exception&) { \
} catch (...) { \
ret = 2; \
} \
if (ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
if (ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
} else { \
cybozu::test::autoRun.set(true); \
} \
}
/**
verify statement does not throw
*/
#define CYBOZU_TEST_NO_EXCEPTION(statement) \
try { \
statement; \
cybozu::test::autoRun.set(true); \
} catch (...) { \
cybozu::test::test(false, "CYBOZU_TEST_NO_EXCEPTION", #statement, __FILE__, __LINE__); \
}
/**
append auto unit test
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO(name) \
void cybozu_test_ ## name(); \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
append auto unit test with fixture
@param name [in] module name
*/
#define CYBOZU_TEST_AUTO_WITH_FIXTURE(name, Fixture) \
void cybozu_test_ ## name(); \
void cybozu_test_real_ ## name() \
{ \
Fixture f; \
cybozu_test_ ## name(); \
} \
struct cybozu_test_local_ ## name { \
cybozu_test_local_ ## name() \
{ \
cybozu::test::autoRun.append(#name, cybozu_test_real_ ## name); \
} \
} cybozu_test_local_instance_ ## name; \
void cybozu_test_ ## name()
/**
setup fixture
@param Fixture [in] class name of fixture
@note cstr of Fixture is called before test and dstr of Fixture is called after test
*/
#define CYBOZU_TEST_SETUP_FIXTURE(Fixture) \
Fixture *cybozu_test_local_fixture; \
void cybozu_test_local_init() \
{ \
cybozu_test_local_fixture = new Fixture(); \
} \
void cybozu_test_local_term() \
{ \
delete cybozu_test_local_fixture; \
} \
struct cybozu_test_local_fixture_setup_ { \
cybozu_test_local_fixture_setup_() \
{ \
cybozu::test::autoRun.setup(cybozu_test_local_init, cybozu_test_local_term); \
} \
} cybozu_test_local_fixture_setup_instance_;

1151
test/jmp.cpp Normal file

File diff suppressed because it is too large Load diff

20
test/jmp.sln Normal file
View file

@ -0,0 +1,20 @@
þ½Ž¿
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual C++ Express 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jmp", "jmp.vcproj", "{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.ActiveCfg = Debug|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Debug|Win32.Build.0 = Debug|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.ActiveCfg = Release|Win32
{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

195
test/jmp.vcproj Normal file
View file

@ -0,0 +1,195 @@
<?xml version="1.0" encoding="shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="jmp"
ProjectGUID="{AC0B3317-E988-44F8-954A-BCBE4B3BB2BF}"
RootNamespace="jmp"
Keyword="Win32Proj"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)/../"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
IntermediateDirectory="$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
AdditionalIncludeDirectories="$(SolutionDir)/../"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="ソース ファイル"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\jmp.cpp"
>
</File>
</Filter>
<Filter
Name="ヘッダー ファイル"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="リソース ファイル"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

63
test/lib.h Normal file
View file

@ -0,0 +1,63 @@
#pragma once
#include <stdio.h>
struct Reg {
int r_;
Reg(int r) : r_(r) {}
};
inline const Reg& getReg0() { static const Reg r(0); return r; }
inline const Reg& getReg1() { static const Reg r(1); return r; }
inline const Reg& getReg2() { static const Reg r(2); return r; }
static const Reg& r0 = getReg0();
static const Reg& r1 = getReg1();
static const Reg& r2 = getReg2();
inline void putReg()
{
puts("putReg");
printf("r0=%p, %d\n", &r0, r0.r_);
printf("r0=%p, %d\n", &r0, r1.r_);
printf("r0=%p, %d\n", &r0, r2.r_);
}
struct A {
int a;
A()
: a(5)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("a=%d\n", a);
}
};
template<int dummy = 0>
struct XT {
static A a;
};
template<int dummy>
A XT<dummy>::a;
typedef XT<0> X;
void init();
struct Init {
Init()
{
puts("Init");
init();
putReg();
}
};
static Init s_init;

51
test/lib_min.cpp Normal file
View file

@ -0,0 +1,51 @@
#include <stdio.h>
static const struct XXX {
XXX() { puts("XXX"); }
} s_sss;
struct A {
int aaa;
A()
: aaa(123)
{
puts("A cstr");
}
~A()
{
puts("A dstr");
}
void put() const
{
printf("aaa=%d\n", aaa);
}
};
template<int dummy = 0>
struct XT {
static A sss;
};
template<int dummy>
A XT<dummy>::sss;
typedef XT<0> X;
static struct Init {
Init()
{
puts("Init");
X::sss.put();
}
} s_init;
int f() { puts("f"); return 4; }
static const int r = f();
int main()
{
puts("main");
printf("r=%d\n", r);
X::sss.put();
}

9
test/lib_run.cpp Normal file
View file

@ -0,0 +1,9 @@
#include "lib.h"
int main()
{
puts("main");
X::a.put();
putReg();
}

13
test/lib_test.cpp Normal file
View file

@ -0,0 +1,13 @@
#include "lib.h"
void init()
{
static bool init = true;
printf("in lib_test %d\n", init);
if (!init) return;
init = false;
X::a.put();
putReg();
}

2190
test/make_512.cpp Normal file

File diff suppressed because it is too large Load diff

3309
test/make_nm.cpp Normal file

File diff suppressed because it is too large Load diff

105
test/misc.cpp Normal file
View file

@ -0,0 +1,105 @@
#include <stdio.h>
#include <string.h>
#include <string>
#include <xbyak/xbyak.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
using namespace Xbyak;
CYBOZU_TEST_AUTO(setSize)
{
struct Code : Xbyak::CodeGenerator {
Code() : Xbyak::CodeGenerator(4096)
{
setSize(4095);
db(1);
size_t size = getSize();
CYBOZU_TEST_EQUAL(size, 4096u);
CYBOZU_TEST_NO_EXCEPTION(setSize(size));
CYBOZU_TEST_EXCEPTION(db(1), Xbyak::Error);
}
} code;
}
CYBOZU_TEST_AUTO(compOperand)
{
using namespace Xbyak::util;
CYBOZU_TEST_ASSERT(eax == eax);
CYBOZU_TEST_ASSERT(ecx != xmm0);
CYBOZU_TEST_ASSERT(ptr[eax] == ptr[eax]);
CYBOZU_TEST_ASSERT(dword[eax] != ptr[eax]);
CYBOZU_TEST_ASSERT(ptr[eax] != ptr[eax+3]);
}
CYBOZU_TEST_AUTO(mov_const)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
const struct {
uint64_t v;
int bit;
bool error;
} tbl[] = {
{ uint64_t(-1), 8, false },
{ 0x12, 8, false },
{ 0x80, 8, false },
{ 0xff, 8, false },
{ 0x100, 8, true },
{ 1, 16, false },
{ uint64_t(-1), 16, false },
{ 0x7fff, 16, false },
{ 0xffff, 16, false },
{ 0x10000, 16, true },
{ uint64_t(-1), 32, false },
{ 0x7fffffff, 32, false },
{ uint64_t(-0x7fffffff), 32, false },
{ 0xffffffff, 32, false },
{ 0x100000000ull, 32, true },
#ifdef XBYAK64
{ uint64_t(-1), 64, false },
{ 0x7fffffff, 64, false },
{ 0xffffffffffffffffull, 64, false },
{ 0x80000000, 64, true },
{ 0xffffffff, 64, true },
#endif
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
const int bit = tbl[i].bit;
const uint64_t v = tbl[i].v;
const Xbyak::AddressFrame& af = bit == 8 ? byte : bit == 16 ? word : bit == 32 ? dword : qword;
if (tbl[i].error) {
CYBOZU_TEST_EXCEPTION(mov(af[eax], v), Xbyak::Error);
} else {
CYBOZU_TEST_NO_EXCEPTION(mov(af[eax], v));
}
}
}
} code;
}
CYBOZU_TEST_AUTO(align)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
const size_t alignSize = 16;
for (int padding = 0; padding < 20; padding++) {
for (int i = 0; i < padding; i++) {
db(1);
}
align(alignSize);
CYBOZU_TEST_EQUAL(size_t(getCurr()) % alignSize, 0u);
}
align(alignSize);
const uint8 *p = getCurr();
// do nothing if aligned
align(alignSize);
CYBOZU_TEST_EQUAL(p, getCurr());
}
} c;
}

37
test/mprotect_test.cpp Normal file
View file

@ -0,0 +1,37 @@
#define XBYAK_NO_OP_NAMES
#include "xbyak/xbyak.h"
#include <string.h>
#include <vector>
struct Code : Xbyak::CodeGenerator {
Code(int x)
{
mov(eax, x);
ret();
}
};
int main()
try
{
#ifdef XBYAK_USE_MMAP_ALLOCATOR
puts("use Allocator with mmap");
#else
puts("use Allocator with posix_memalign");
#endif
const int N = 70000;
std::vector<Code*> v(N);
for (int i = 0; i < N; i++) {
v[i] = new Code(i);
}
long long sum = 0;
for (int i = 0; i < N; i++) {
sum += v[i]->getCode<int (*)()>()();
}
for (int i = 0; i < N; i++) {
delete v[i];
}
printf("sum=%lld\n", sum);
} catch (std::exception& e) {
printf("ERR %s\n", e.what());
}

51
test/nm_frame.cpp Normal file
View file

@ -0,0 +1,51 @@
#include <stdio.h>
#define XBYAK_ENABLE_OMITTED_OPERAND
#include "xbyak/xbyak.h"
using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4245)
#endif
class Sample : public CodeGenerator {
void operator=(const Sample&);
public:
#include "nm.cpp"
};
#define _STR(x) #x
#define TEST(syntax) err = true; try { syntax; err = false; } catch (Xbyak::Error) { } catch (...) { } if (!err) printf("should be err:%s;\n", _STR(syntax))
class ErrorSample : public CodeGenerator {
void operator=(const ErrorSample&);
public:
void gen()
{
bool err;
TEST(mov(ptr[eax],1));
TEST(test(ptr[eax],1));
TEST(adc(ptr[eax],1));
TEST(setz(eax));
}
};
int main()
try
{
size_t size = sizeof(Xbyak::Operand);
if (size != 4) {
printf("sizeof Operand %d\n", (int)size);
}
try {
Sample s;
s.gen();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
ErrorSample es;
es.gen();
} catch (std::exception& e) {
printf("err %s\n", e.what());
return 1;
}

45
test/normalize_prefix.cpp Normal file
View file

@ -0,0 +1,45 @@
/*
normalize prefix
*/
#include <string>
#include <set>
#include <iostream>
#include <memory.h>
typedef unsigned char uint8;
std::string normalize(const std::string& line)
{
static const char tbl[][3] = { "66", "67", "F2", "F3" };
size_t tblNum = sizeof(tbl) / sizeof(tbl[0]);
typedef std::set<std::string> StringSet;
StringSet suf;
size_t pos = 0;
for (; pos < line.size(); pos += 2) {
bool found = false;
for (size_t i = 0; i < tblNum; i++) {
if (::memcmp(&line[pos], tbl[i], 2) == 0) {
found = true;
suf.insert(tbl[i]);
break;
}
}
if (!found) break;
}
std::string ret;
for (StringSet::const_iterator i = suf.begin(), e = suf.end(); i != e; ++i) {
ret += *i;
}
ret += &line[pos];
return ret;
}
int main()
{
std::string line;
while (std::getline(std::cin, line)) {
std::string normalizedLine = normalize(line);
std::cout << normalizedLine << '\n';//std::endl;
}
}

6
test/readme.txt Normal file
View file

@ -0,0 +1,6 @@
test script on Windows
this test requires nasm.exe, yasm.exe, cl.exe, awk, diff
test_all ; for all tests

88
test/rip-label-imm.cpp Normal file
View file

@ -0,0 +1,88 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
/*
dump of vc
00000000003A0000 F3 0F C2 05 F1 00 00 00 00 cmpeqss xmm0,dword ptr [3A00FAh]
00000000003A0009 F7 05 E7 00 00 00 21 00 00 00 test dword ptr [3A00FAh],21h
00000000003A0013 0F BA 25 DF 00 00 00 03 bt dword ptr [3A00FAh],3
00000000003A001B C4 E3 79 0D 05 D5 00 00 00 03 vblendpd xmm0,xmm0,xmmword ptr [3A00FAh],3
00000000003A0025 C4 E3 79 0F 05 CB 00 00 00 04 vpalignr xmm0,xmm0,xmmword ptr [3A00FAh],4
00000000003A002F C4 E3 7D 19 1D C1 00 00 00 0C vextractf128 xmmword ptr [3A00FAh],ymm3,0Ch
00000000003A0039 C4 E3 75 46 05 B7 00 00 00 0D vperm2i128 ymm0,ymm1,ymmword ptr [3A00FAh],0Dh
00000000003A0043 C4 E3 79 1D 15 AD 00 00 00 2C vcvtps2ph mmword ptr [3A00FAh],xmm2,2Ch
00000000003A004D C7 05 A3 00 00 00 34 12 00 00 mov dword ptr [3A00FAh],1234h
00000000003A0057 C1 25 9C 00 00 00 03 shl dword ptr [3A00FAh],3
00000000003A005E D1 2D 96 00 00 00 shr dword ptr [3A00FAh],1
00000000003A0064 48 0F A4 05 8D 00 00 00 03 shld qword ptr [3A00FAh],rax,3
00000000003A006D 48 6B 05 85 00 00 00 15 imul rax,qword ptr [3A00FAh],15h
00000000003A0075 C4 E3 FB F0 05 7B 00 00 00 15 rorx rax,qword ptr [3A00FAh],15h
00000000003A007F F7 05 71 00 00 00 05 00 00 00 test dword ptr [3A00FAh],5
00000000003A0089 66 48 0F 3A 16 05 66 00 00 00 03 pextrq qword ptr [3A00FAh],xmm0,3
00000000003A0094 66 48 0F 3A 22 15 5B 00 00 00 05 pinsrq xmm2,qword ptr [3A00FAh],5
00000000003A009F 66 0F 3A 15 0D 51 00 00 00 04 pextrw word ptr [3A00FAh],xmm1,4
00000000003A00A9 81 15 47 00 00 00 45 23 01 00 adc dword ptr [3A00FAh],12345h
00000000003A00B3 0F BA 25 3F 00 00 00 34 bt dword ptr [3A00FAh],34h
00000000003A00BB 66 0F BA 3D 36 00 00 00 34 btc word ptr [3A00FAh],34h
00000000003A00C4 0F BA 35 2E 00 00 00 34 btr dword ptr [3A00FAh],34h
00000000003A00CC C1 15 27 00 00 00 04 rcl dword ptr [3A00FAh],4
00000000003A00D3 48 0F A4 05 1E 00 00 00 04 shld qword ptr [3A00FAh],rax,4
00000000003A00DC 0F 3A 0F 05 15 00 00 00 04 palignr mm0,mmword ptr [3A00FAh],4
00000000003A00E5 66 0F 3A DF 1D 0B 00 00 00 04 aeskeygenassist xmm3,xmmword ptr [3A00FAh],4
00000000003A00EF C4 E3 79 60 15 01 00 00 00 07 vpcmpestrm xmm2,xmmword ptr [3A00FAh],7
00000000003A00F9 C3 ret
00000000003A00FA F0 DE BC 9A 78 56 34 12
*/
struct Code : Xbyak::CodeGenerator {
Code()
{
Xbyak::Label label;
cmpss(xmm0, ptr[rip + label], 0);
test(dword[rip + label], 33);
bt(dword[rip + label ], 3);
vblendpd(xmm0, dword[rip + label], 3);
vpalignr(xmm0, qword[rip + label], 4);
vextractf128(dword[rip + label], ymm3, 12);
vperm2i128(ymm0, ymm1, qword[rip + label], 13);
vcvtps2ph(ptr[rip + label], xmm2, 44);
mov(dword[rip + label], 0x1234);
shl(dword[rip + label], 3);
shr(dword[rip + label], 1);
shld(qword[rip + label], rax, 3);
imul(rax, qword[rip + label], 21);
rorx(rax, qword[rip + label], 21);
test(dword[rip + label], 5);
pextrq(ptr[rip + label], xmm0, 3);
pinsrq(xmm2, ptr[rip + label], 5);
pextrw(ptr[rip + label], xmm1, 4);
adc(dword[rip + label], 0x12345);
bt(byte[rip + label], 0x34);
btc(word[rip + label], 0x34);
btr(dword[rip + label], 0x34);
rcl(dword[rip + label], 4);
shld(qword[rip + label], rax, 4);
palignr(mm0, ptr[rip + label], 4);
aeskeygenassist(xmm3, ptr[rip + label], 4);
vpcmpestrm(xmm2, ptr[rip + label], 7);
ret();
L(label);
dq(0x123456789abcdef0ull);
};
};
void dump(const unsigned char *p, size_t n)
{
for (int i = 0; i < n; i++) {
printf("%02x ", p[i]);
if ((i % 16) == 15) putchar('\n');
}
putchar('\n');
}
int main()
{
Code code;
void (*f)() = code.getCode<void (*)()>();
dump(code.getCode(), code.getSize());
f();
}

338
test/sf_test.cpp Normal file
View file

@ -0,0 +1,338 @@
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak_util.h>
#ifdef XBYAK32
#error "this sample is for only 64-bit mode"
#endif
using namespace Xbyak::util;
struct Code : public Xbyak::CodeGenerator {
void gen1()
{
StackFrame sf(this, 1);
mov(rax, sf.p[0]);
}
void gen2()
{
StackFrame sf(this, 2);
lea(rax, ptr [sf.p[0] + sf.p[1]]);
}
void gen3()
{
StackFrame sf(this, 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen4()
{
StackFrame sf(this, 4);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen5()
{
StackFrame sf(this, 4, UseRCX);
xor_(rcx, rcx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen6()
{
StackFrame sf(this, 4, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
void gen7()
{
StackFrame sf(this, 3, UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen8()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
}
void gen9()
{
StackFrame sf(this, 3, 3 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
mov(sf.t[0], 1);
mov(sf.t[1], 2);
mov(sf.t[2], 3);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen10()
{
StackFrame sf(this, 4, 8 | UseRCX | UseRDX, 32);
xor_(rcx, rcx);
xor_(rdx, rdx);
for (int i = 0; i < 8; i++) {
mov(sf.t[i], i);
}
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
mov(ptr [rsp + 8 * 0], rax);
mov(ptr [rsp + 8 * 1], rax);
mov(ptr [rsp + 8 * 2], rax);
mov(ptr [rsp + 8 * 3], rax);
}
void gen11()
{
StackFrame sf(this, 0, UseRCX);
xor_(rcx, rcx);
mov(rax, 3);
}
void gen12()
{
StackFrame sf(this, 4, UseRDX);
xor_(rdx, rdx);
mov(rax, sf.p[0]);
add(rax, sf.p[1]);
add(rax, sf.p[2]);
add(rax, sf.p[3]);
}
};
struct Code2 : Xbyak::CodeGenerator {
Code2()
: Xbyak::CodeGenerator(4096 * 32)
{
}
void gen(int pNum, int tNum, int stackSizeByte)
{
StackFrame sf(this, pNum, tNum, stackSizeByte);
if (tNum & UseRCX) xor_(rcx, rcx);
if (tNum & UseRDX) xor_(rdx, rdx);
for (int i = 0, n = tNum & ~(UseRCX | UseRDX); i < n; i++) {
mov(sf.t[i], 5);
}
for (int i = 0; i < stackSizeByte; i++) {
mov(byte [rsp + i], 0);
}
mov(rax, 1);
for (int i = 0; i < pNum; i++) {
add(rax, sf.p[i]);
}
}
};
static int errNum = 0;
void check(int x, int y)
{
if (x != y) {
printf("err x=%d, y=%d\n", x, y);
errNum++;
}
}
void verify(const Xbyak::uint8 *f, int pNum)
{
switch (pNum) {
case 0:
check(1, Xbyak::CastTo<int (*)()>(f)());
return;
case 1:
check(11, Xbyak::CastTo<int (*)(int)>(f)(10));
return;
case 2:
check(111, Xbyak::CastTo<int (*)(int, int)>(f)(10, 100));
return;
case 3:
check(1111, Xbyak::CastTo<int (*)(int, int, int)>(f)(10, 100, 1000));
return;
case 4:
check(11111, Xbyak::CastTo<int (*)(int, int, int, int)>(f)(10, 100, 1000, 10000));
return;
default:
printf("ERR pNum=%d\n", pNum);
exit(1);
}
}
void testAll()
{
Code2 code;
for (int stackSize = 0; stackSize < 32; stackSize += 7) {
for (int pNum = 0; pNum < 4; pNum++) {
for (int mode = 0; mode < 4; mode++) {
int maxNum = 0;
int opt = 0;
if (mode == 0) {
maxNum = 10;
} else if (mode == 1) {
maxNum = 9;
opt = UseRCX;
} else if (mode == 2) {
maxNum = 9;
opt = UseRDX;
} else {
maxNum = 8;
opt = UseRCX | UseRDX;
}
for (int tNum = 0; tNum < maxNum; tNum++) {
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
const Xbyak::uint8 *f = code.getCurr();
code.gen(pNum, tNum | opt, stackSize);
verify(f, pNum);
}
}
}
}
}
void testPartial()
{
Code code;
int (*f1)(int) = code.getCurr<int (*)(int)>();
code.gen1();
check(5, f1(5));
int (*f2)(int, int) = code.getCurr<int (*)(int, int)>();
code.gen2();
check(9, f2(3, 6));
int (*f3)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen3();
check(14, f3(1, 4, 9));
int (*f4)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen4();
check(30, f4(1, 4, 9, 16));
int (*f5)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen5();
check(23, f5(2, 5, 7, 9));
int (*f6)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen6();
check(18, f6(3, 4, 5, 6));
int (*f7)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen7();
check(12, f7(3, 4, 5));
int (*f8)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen8();
check(23, f8(5, 8, 10));
int (*f9)(int, int, int) = code.getCurr<int (*)(int, int, int)>();
code.gen9();
check(60, f9(10, 20, 30));
int (*f10)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen10();
check(100, f10(10, 20, 30, 40));
int (*f11)() = code.getCurr<int (*)()>();
code.gen11();
check(3, f11());
int (*f12)(int, int, int, int) = code.getCurr<int (*)(int, int, int, int)>();
code.gen12();
check(24, f12(3, 5, 7, 9));
}
void put(const Xbyak::util::Pack& p)
{
for (size_t i = 0, n = p.size(); i < n; i++) {
printf("%s ", p[i].toString());
}
printf("\n");
}
void verifyPack(const Xbyak::util::Pack& p, const int *tbl, size_t tblNum)
{
for (size_t i = 0; i < tblNum; i++) {
check(p[i].getIdx(), tbl[i]);
}
}
void testPack()
{
const int N = 10;
Xbyak::Reg64 regTbl[N];
for (int i = 0; i < N; i++) {
regTbl[i] = Xbyak::Reg64(i);
}
Xbyak::util::Pack p(regTbl, N);
const struct {
int pos;
int num;
int tbl[10];
} tbl[] = {
{ 0, 10, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 1, 9, { 1, 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 2, 8, { 2, 3, 4, 5, 6, 7, 8, 9 } },
{ 3, 7, { 3, 4, 5, 6, 7, 8, 9 } },
{ 4, 6, { 4, 5, 6, 7, 8, 9 } },
{ 5, 5, { 5, 6, 7, 8, 9 } },
{ 6, 4, { 6, 7, 8, 9 } },
{ 7, 3, { 7, 8, 9 } },
{ 8, 2, { 8, 9 } },
{ 9, 1, { 9 } },
{ 3, 5, { 3, 4, 5, 6, 7 } },
};
for (size_t i = 0; i < sizeof(tbl) / sizeof(*tbl); i++) {
const int pos = tbl[i].pos;
const int num = tbl[i].num;
verifyPack(p.sub(pos, num), tbl[i].tbl, num);
if (pos + num == N) {
verifyPack(p.sub(pos), tbl[i].tbl, num);
}
}
}
int main()
try
{
testAll();
testPartial();
testPack();
printf("errNum=%d\n", errNum);
} catch (std::exception& e) {
printf("err %s\n", e.what());
return 1;
} catch (...) {
puts("ERR");
return 1;
}

BIN
test/state.pptx Normal file

Binary file not shown.

37
test/test_address.bat Normal file
View file

@ -0,0 +1,37 @@
@echo off
set FILTER=cat
if /i "%1"=="64" (
set OPT2=-DXBYAK64
set OPT3=win64
) else (
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
if /i "%1"=="64" (
call :sub 1
call :sub 2
) else (
call :sub 1
)
goto end
:sub
echo cl address.cpp %OPT% %OPT2%
cl address.cpp %OPT% %OPT2%
address %1% > a.asm
echo nasm -f %OPT3% -l a.lst a.asm
nasm -f %OPT3% -l a.lst a.asm
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
echo address %1% jit > nm.cpp
address %1% jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame > x.lst
diff x.lst ok.lst
wc x.lst
:end

42
test/test_address.sh Executable file
View file

@ -0,0 +1,42 @@
#!/bin/sh
FILTER="grep -v warning"
sub()
{
CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile address.cpp"
g++ $CFLAGS address.cpp -o address
./address $1 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./address $1 jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame > x.lst
diff ok.lst x.lst && echo "ok"
wc x.lst
}
if [ "$1" = "64" ]; then
echo "nasm(64bit)"
EXE=nasm
OPT2=-DXBYAK64
OPT3=win64
sub 1
sub 2
else
echo "nasm(32bit)"
EXE=nasm
OPT2=-DXBYAK32
OPT3=win32
sub 1
fi

8
test/test_all.bat Normal file
View file

@ -0,0 +1,8 @@
@echo off
call test_nm_all
echo *** test addressing ***
call test_address
call test_address 64
echo *** test jmp address ***
call test_jmp
echo *** all test end ***

42
test/test_avx.bat Normal file
View file

@ -0,0 +1,42 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs /DUSE_AVX
make_nm > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
echo cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
wc x.lst

44
test/test_avx.sh Executable file
View file

@ -0,0 +1,44 @@
#!/bin/tcsh
set FILTER="grep -v warning"
if ($1 == "Y") then
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff ok.lst x.lst && echo "ok"
exit 0

31
test/test_avx512.bat Normal file
View file

@ -0,0 +1,31 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="min" (
set EXE=nasm.exe
set OPT2=-DXBYAK64 -DMIN_TEST
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
cl -I../ make_512.cpp %OPT% %OPT2% /EHs /DUSE_AVX512
make_512 > a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
make_512 jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2% /DXBYAK_AVX512
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
wc x.lst

33
test/test_avx512.sh Executable file
View file

@ -0,0 +1,33 @@
#!/bin/tcsh
set FILTER="grep -v warning"
if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512"
echo "compile make_512.cpp"
g++ $CFLAGS make_512.cpp -o make_512
./make_512 > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./make_512 jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame -DXBYAK_AVX512
./nm_frame | $FILTER > x.lst
diff ok.lst x.lst && echo "ok"
exit 0

9
test/test_avx_all.bat Normal file
View file

@ -0,0 +1,9 @@
@echo off
echo ** nasm-avx(32bit) ***
call test_avx
echo ** nasm-avx(64bit) ***
call test_avx 64
echo ** yasm-avx(32bit) ***
call test_avx Y
echo ** yasm-avx(64bit) ***
call test_avx Y64

4
test/test_jmp.bat Normal file
View file

@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST jmp.cpp %OPT% /Od /Zi
jmp

4
test/test_misc.bat Normal file
View file

@ -0,0 +1,4 @@
call set_opt
bmake -f Makefile.win all
cl -I../ -I./ -DXBYAK_TEST misc.cpp %OPT% /Od /Zi
misc

78
test/test_mmx.cpp Normal file
View file

@ -0,0 +1,78 @@
#if defined(_MSC_VER) && (_MSC_VER <= 1200)
#pragma warning(disable:4514)
#pragma warning(disable:4786)
#endif
#include <stdio.h>
#include <stdlib.h>
#include "../../include.mie/mie_thread.h"
#include "xbyak/xbyak.h"
class WriteMMX : public Xbyak::CodeGenerator {
public:
WriteMMX()
{
#ifdef XBYAK32
mov(ecx, ptr [esp + 4]);
#endif
movd(mm0, ecx);
ret();
}
void (*set() const)(int x) { return (void (*)(int x))getCode(); }
};
class ReadMMX : public Xbyak::CodeGenerator {
public:
ReadMMX()
{
movd(eax, mm0);
ret();
}
int (*get() const)() { return (int (*)())getCode(); }
};
class Test : public MIE::ThreadBase<Test> {
int n_;
public:
Test(int n)
: n_(n)
{
}
void threadEntry()
{
printf("n=%d\n", n_);
WriteMMX w;
w.set()(n_);
ReadMMX r;
for (;;) {
int b = r.get()();
printf("b=%d\n", b);
if (b != n_) {
printf("mm0 has changed!\n");
}
MIE::MIE_Sleep(1000);
}
}
void stopThread() { }
};
int main(int argc, char *argv[])
{
#ifdef XBYAK32
puts("32bit");
#else
puts("64bit");
#endif
try {
int n = atoi(argc == 1 ? "1223" : argv[1]);
Test test0(n), test1(n + 1);
test0.beginThread();
test1.beginThread();
test0.joinThread();
test1.joinThread();
} catch (std::exception& e) {
printf("ERR:%s\n", e.what());
} catch (...) {
printf("unknown error\n");
}
}

43
test/test_nm.bat Normal file
View file

@ -0,0 +1,43 @@
@echo off
set FILTER=cat
set Y=0
if /i "%1"=="Y" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK32
set OPT3=win32
) else if /i "%1"=="64" (
set EXE=nasm.exe
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else if /i "%1"=="Y64" (
set Y=1
set EXE=yasm.exe
set OPT2=-DUSE_YASM -DXBYAK64
set OPT3=win64
set FILTER=normalize_prefix
) else (
set EXE=nasm.exe
set OPT2=-DXBYAK32
set OPT3=win32
)
call set_opt
bmake -f Makefile.win all
echo cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
cl -I../ make_nm.cpp %OPT% %OPT2% /EHs
make_nm > a.asm
rm a.lst
echo %EXE% -f %OPT3% -l a.lst a.asm
%EXE% -f %OPT3% -l a.lst a.asm
rem connect "?????-" and "??"
if /i "%Y%"=="1" (
awk "NR > 1 {if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
) else (
awk "{if (index($3, ""-"")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = """" }} " < a.lst |%FILTER% > ok.lst
)
make_nm jit > nm.cpp
cl -I../ -DXBYAK_TEST nm_frame.cpp %OPT% %OPT2%
nm_frame |%FILTER% > x.lst
diff x.lst ok.lst
wc x.lst

44
test/test_nm.sh Executable file
View file

@ -0,0 +1,44 @@
#!/bin/tcsh
set FILTER=cat
if ($1 == "Y") then
echo "yasm(32bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK32"
set OPT3=win32
else if ($1 == "64") then
echo "nasm(64bit)"
set EXE=nasm
set OPT2=-DXBYAK64
set OPT3=win64
set FILTER=./normalize_prefix
else if ($1 == "Y64") then
echo "yasm(64bit)"
set EXE=yasm
set OPT2="-DUSE_YASM -DXBYAK64"
set OPT3=win64
set FILTER=./normalize_prefix
else
echo "nasm(32bit)"
set EXE=nasm
set OPT2=-DXBYAK32
set OPT3=win32
endif
set CFLAGS="-Wall -fno-operator-names -I../ $OPT2"
echo "compile make_nm.cpp"
g++ $CFLAGS make_nm.cpp -o make_nm
./make_nm > a.asm
echo "asm"
$EXE -f$OPT3 a.asm -l a.lst
awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst
echo "xbyak"
./make_nm jit > nm.cpp
echo "compile nm_frame.cpp"
g++ $CFLAGS -DXBYAK_TEST nm_frame.cpp -o nm_frame
./nm_frame | $FILTER > x.lst
diff ok.lst x.lst && echo "ok"
exit 0

11
test/test_nm_all.bat Normal file
View file

@ -0,0 +1,11 @@
@echo off
echo *** nasm(32bit) ***
call test_nm
echo *** yasm(32bit) ***
call test_nm Y
echo *** nasm(64bit) ***
call test_nm 64
echo *** yasm(64bit) ***
call test_nm Y64
call test_avx_all

86
xbyak.sln Normal file
View file

@ -0,0 +1,86 @@
þ½Ž¿
Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bf", "sample\bf.vcproj", "{654BD79B-59D3-4B10-BBAA-158BAB272828}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc", "sample\calc.vcproj", "{5FDDFAA6-B947-491D-A17E-BBD863846579}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "quantize", "sample\quantize.vcproj", "{D06753BF-E1F3-4578-9B18-08673327F77C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test0", "sample\test0.vcproj", "{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toyvm", "sample\toyvm.vcproj", "{2E41C7AF-39FF-454C-B081-37445378DCB3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_util", "sample\test_util.vcproj", "{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "calc2", "sample\calc2.vcproj", "{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.ActiveCfg = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|Win32.Build.0 = Debug|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.ActiveCfg = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Debug|x64.Build.0 = Debug|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.ActiveCfg = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|Win32.Build.0 = Release|Win32
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.ActiveCfg = Release|x64
{654BD79B-59D3-4B10-BBAA-158BAB272828}.Release|x64.Build.0 = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.ActiveCfg = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|Win32.Build.0 = Debug|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.ActiveCfg = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Debug|x64.Build.0 = Debug|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.ActiveCfg = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|Win32.Build.0 = Release|Win32
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.ActiveCfg = Release|x64
{5FDDFAA6-B947-491D-A17E-BBD863846579}.Release|x64.Build.0 = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.ActiveCfg = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|Win32.Build.0 = Debug|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.ActiveCfg = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Debug|x64.Build.0 = Debug|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.ActiveCfg = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|Win32.Build.0 = Release|Win32
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.ActiveCfg = Release|x64
{D06753BF-E1F3-4578-9B18-08673327F77C}.Release|x64.Build.0 = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.ActiveCfg = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|Win32.Build.0 = Debug|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.ActiveCfg = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Debug|x64.Build.0 = Debug|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.ActiveCfg = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|Win32.Build.0 = Release|Win32
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.ActiveCfg = Release|x64
{1CDE4D2A-BE3A-4B9B-B28F-524A23084A8E}.Release|x64.Build.0 = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.ActiveCfg = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|Win32.Build.0 = Debug|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.ActiveCfg = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Debug|x64.Build.0 = Debug|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.ActiveCfg = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|Win32.Build.0 = Release|Win32
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.ActiveCfg = Release|x64
{2E41C7AF-39FF-454C-B081-37445378DCB3}.Release|x64.Build.0 = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.ActiveCfg = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|Win32.Build.0 = Debug|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.ActiveCfg = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Debug|x64.Build.0 = Debug|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.ActiveCfg = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|Win32.Build.0 = Release|Win32
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.ActiveCfg = Release|x64
{CFC9B272-FDA1-4C87-B4EF-CDCA9B57F4DD}.Release|x64.Build.0 = Release|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.ActiveCfg = Debug|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|Win32.Build.0 = Debug|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.ActiveCfg = Debug|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Debug|x64.Build.0 = Debug|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.ActiveCfg = Release|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|Win32.Build.0 = Release|Win32
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.ActiveCfg = Release|x64
{8EC11C7F-1B5C-4787-8940-B9B3AAF1D204}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

2490
xbyak/xbyak.h Normal file

File diff suppressed because it is too large Load diff

258
xbyak/xbyak_bin2hex.h Normal file
View file

@ -0,0 +1,258 @@
enum {
B00000000= 0,
B00000001= 1,
B00000010= 2,
B00000011= 3,
B00000100= 4,
B00000101= 5,
B00000110= 6,
B00000111= 7,
B00001000= 8,
B00001001= 9,
B00001010= 10,
B00001011= 11,
B00001100= 12,
B00001101= 13,
B00001110= 14,
B00001111= 15,
B00010000= 16,
B00010001= 17,
B00010010= 18,
B00010011= 19,
B00010100= 20,
B00010101= 21,
B00010110= 22,
B00010111= 23,
B00011000= 24,
B00011001= 25,
B00011010= 26,
B00011011= 27,
B00011100= 28,
B00011101= 29,
B00011110= 30,
B00011111= 31,
B00100000= 32,
B00100001= 33,
B00100010= 34,
B00100011= 35,
B00100100= 36,
B00100101= 37,
B00100110= 38,
B00100111= 39,
B00101000= 40,
B00101001= 41,
B00101010= 42,
B00101011= 43,
B00101100= 44,
B00101101= 45,
B00101110= 46,
B00101111= 47,
B00110000= 48,
B00110001= 49,
B00110010= 50,
B00110011= 51,
B00110100= 52,
B00110101= 53,
B00110110= 54,
B00110111= 55,
B00111000= 56,
B00111001= 57,
B00111010= 58,
B00111011= 59,
B00111100= 60,
B00111101= 61,
B00111110= 62,
B00111111= 63,
B01000000= 64,
B01000001= 65,
B01000010= 66,
B01000011= 67,
B01000100= 68,
B01000101= 69,
B01000110= 70,
B01000111= 71,
B01001000= 72,
B01001001= 73,
B01001010= 74,
B01001011= 75,
B01001100= 76,
B01001101= 77,
B01001110= 78,
B01001111= 79,
B01010000= 80,
B01010001= 81,
B01010010= 82,
B01010011= 83,
B01010100= 84,
B01010101= 85,
B01010110= 86,
B01010111= 87,
B01011000= 88,
B01011001= 89,
B01011010= 90,
B01011011= 91,
B01011100= 92,
B01011101= 93,
B01011110= 94,
B01011111= 95,
B01100000= 96,
B01100001= 97,
B01100010= 98,
B01100011= 99,
B01100100= 100,
B01100101= 101,
B01100110= 102,
B01100111= 103,
B01101000= 104,
B01101001= 105,
B01101010= 106,
B01101011= 107,
B01101100= 108,
B01101101= 109,
B01101110= 110,
B01101111= 111,
B01110000= 112,
B01110001= 113,
B01110010= 114,
B01110011= 115,
B01110100= 116,
B01110101= 117,
B01110110= 118,
B01110111= 119,
B01111000= 120,
B01111001= 121,
B01111010= 122,
B01111011= 123,
B01111100= 124,
B01111101= 125,
B01111110= 126,
B01111111= 127,
B10000000= 128,
B10000001= 129,
B10000010= 130,
B10000011= 131,
B10000100= 132,
B10000101= 133,
B10000110= 134,
B10000111= 135,
B10001000= 136,
B10001001= 137,
B10001010= 138,
B10001011= 139,
B10001100= 140,
B10001101= 141,
B10001110= 142,
B10001111= 143,
B10010000= 144,
B10010001= 145,
B10010010= 146,
B10010011= 147,
B10010100= 148,
B10010101= 149,
B10010110= 150,
B10010111= 151,
B10011000= 152,
B10011001= 153,
B10011010= 154,
B10011011= 155,
B10011100= 156,
B10011101= 157,
B10011110= 158,
B10011111= 159,
B10100000= 160,
B10100001= 161,
B10100010= 162,
B10100011= 163,
B10100100= 164,
B10100101= 165,
B10100110= 166,
B10100111= 167,
B10101000= 168,
B10101001= 169,
B10101010= 170,
B10101011= 171,
B10101100= 172,
B10101101= 173,
B10101110= 174,
B10101111= 175,
B10110000= 176,
B10110001= 177,
B10110010= 178,
B10110011= 179,
B10110100= 180,
B10110101= 181,
B10110110= 182,
B10110111= 183,
B10111000= 184,
B10111001= 185,
B10111010= 186,
B10111011= 187,
B10111100= 188,
B10111101= 189,
B10111110= 190,
B10111111= 191,
B11000000= 192,
B11000001= 193,
B11000010= 194,
B11000011= 195,
B11000100= 196,
B11000101= 197,
B11000110= 198,
B11000111= 199,
B11001000= 200,
B11001001= 201,
B11001010= 202,
B11001011= 203,
B11001100= 204,
B11001101= 205,
B11001110= 206,
B11001111= 207,
B11010000= 208,
B11010001= 209,
B11010010= 210,
B11010011= 211,
B11010100= 212,
B11010101= 213,
B11010110= 214,
B11010111= 215,
B11011000= 216,
B11011001= 217,
B11011010= 218,
B11011011= 219,
B11011100= 220,
B11011101= 221,
B11011110= 222,
B11011111= 223,
B11100000= 224,
B11100001= 225,
B11100010= 226,
B11100011= 227,
B11100100= 228,
B11100101= 229,
B11100110= 230,
B11100111= 231,
B11101000= 232,
B11101001= 233,
B11101010= 234,
B11101011= 235,
B11101100= 236,
B11101101= 237,
B11101110= 238,
B11101111= 239,
B11110000= 240,
B11110001= 241,
B11110010= 242,
B11110011= 243,
B11110100= 244,
B11110101= 245,
B11110110= 246,
B11110111= 247,
B11111000= 248,
B11111001= 249,
B11111010= 250,
B11111011= 251,
B11111100= 252,
B11111101= 253,
B11111110= 254,
B11111111= 255
};

1928
xbyak/xbyak_mnemonic.h Normal file

File diff suppressed because it is too large Load diff

572
xbyak/xbyak_util.h Normal file
View file

@ -0,0 +1,572 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
/**
utility class and functions for Xbyak
Xbyak::util::Clock ; rdtsc timer
Xbyak::util::Cpu ; detect CPU
@note this header is UNDER CONSTRUCTION!
*/
#include "xbyak.h"
#ifdef _MSC_VER
#if (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
{
__asm {
push ebx
push esi
mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
cpuid
mov esi, dword ptr [esp + 4 * 2 + 4] // data
mov dword ptr [esi], eax
mov dword ptr [esi + 4], ebx
mov dword ptr [esi + 8], ecx
mov dword ptr [esi + 12], edx
pop esi
pop ebx
ret
}
}
#else
#include <intrin.h> // for __cpuid
#endif
#else
#ifndef __GNUC_PREREQ
#define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
#endif
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
#include <cpuid.h>
#else
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#else
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#endif
#endif
#endif
namespace Xbyak { namespace util {
/**
CPU detection class
*/
class Cpu {
uint64 type_;
unsigned int get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
}
unsigned int mask(int n) const
{
return (1U << n) - 1;
}
void setFamily()
{
unsigned int data[4];
getCpuid(1, data);
stepping = data[0] & mask(4);
model = (data[0] >> 4) & mask(4);
family = (data[0] >> 8) & mask(4);
// type = (data[0] >> 12) & mask(2);
extModel = (data[0] >> 16) & mask(4);
extFamily = (data[0] >> 20) & mask(8);
if (family == 0x0f) {
displayFamily = family + extFamily;
} else {
displayFamily = family;
}
if (family == 6 || family == 0x0f) {
displayModel = (extModel << 4) + model;
} else {
displayModel = model;
}
}
public:
int model;
int family;
int stepping;
int extModel;
int extFamily;
int displayFamily; // family + extFamily
int displayModel; // model + extModel
/*
data[] = { eax, ebx, ecx, edx }
*/
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _MSC_VER
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
{
#ifdef _MSC_VER
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
#else
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
#endif
}
static inline uint64 getXfeature()
{
#ifdef _MSC_VER
return _xgetbv(0);
#else
unsigned int eax, edx;
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64)edx << 32) | eax;
#endif
}
typedef uint64 Type;
static const Type NONE = 0;
static const Type tMMX = 1 << 0;
static const Type tMMX2 = 1 << 1;
static const Type tCMOV = 1 << 2;
static const Type tSSE = 1 << 3;
static const Type tSSE2 = 1 << 4;
static const Type tSSE3 = 1 << 5;
static const Type tSSSE3 = 1 << 6;
static const Type tSSE41 = 1 << 7;
static const Type tSSE42 = 1 << 8;
static const Type tPOPCNT = 1 << 9;
static const Type tAESNI = 1 << 10;
static const Type tSSE5 = 1 << 11;
static const Type tOSXSAVE = 1 << 12;
static const Type tPCLMULQDQ = 1 << 13;
static const Type tAVX = 1 << 14;
static const Type tFMA = 1 << 15;
static const Type t3DN = 1 << 16;
static const Type tE3DN = 1 << 17;
static const Type tSSE4a = 1 << 18;
static const Type tRDTSCP = 1 << 19;
static const Type tAVX2 = 1 << 20;
static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
static const Type tLZCNT = 1 << 23;
static const Type tINTEL = 1 << 24;
static const Type tAMD = 1 << 25;
static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
static const Type tRDRAND = 1 << 27;
static const Type tADX = 1 << 28; // adcx, adox
static const Type tRDSEED = 1 << 29; // rdseed
static const Type tSMAP = 1 << 30; // stac
static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
static const Type tMOVBE = uint64(1) << 34; // mobve
static const Type tAVX512F = uint64(1) << 35;
static const Type tAVX512DQ = uint64(1) << 36;
static const Type tAVX512IFMA = uint64(1) << 37;
static const Type tAVX512PF = uint64(1) << 38;
static const Type tAVX512ER = uint64(1) << 39;
static const Type tAVX512CD = uint64(1) << 40;
static const Type tAVX512BW = uint64(1) << 41;
static const Type tAVX512VL = uint64(1) << 42;
static const Type tAVX512VBMI = uint64(1) << 43;
static const Type tAVX512_4VNNIW = uint64(1) << 44;
static const Type tAVX512_4FMAPS = uint64(1) << 45;
static const Type tPREFETCHWT1 = uint64(1) << 46;
static const Type tPREFETCHW = uint64(1) << 47;
static const Type tSHA = uint64(1) << 48;
static const Type tMPX = uint64(1) << 49;
Cpu()
: type_(NONE)
{
unsigned int data[4];
getCpuid(0, data);
const unsigned int maxNum = data[0];
static const char intel[] = "ntel";
static const char amd[] = "cAMD";
if (data[2] == get32bitAsBE(amd)) {
type_ |= tAMD;
getCpuid(0x80000001, data);
if (data[3] & (1U << 31)) type_ |= t3DN;
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 30)) type_ |= tE3DN;
if (data[3] & (1U << 22)) type_ |= tMMX2;
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
}
if (data[2] == get32bitAsBE(intel)) {
type_ |= tINTEL;
getCpuid(0x80000001, data);
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
if (data[2] & (1U << 5)) type_ |= tLZCNT;
if (data[2] & (1U << 8)) type_ |= tPREFETCHW;
}
getCpuid(1, data);
if (data[2] & (1U << 0)) type_ |= tSSE3;
if (data[2] & (1U << 9)) type_ |= tSSSE3;
if (data[2] & (1U << 19)) type_ |= tSSE41;
if (data[2] & (1U << 20)) type_ |= tSSE42;
if (data[2] & (1U << 22)) type_ |= tMOVBE;
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
if (data[2] & (1U << 25)) type_ |= tAESNI;
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
if (data[2] & (1U << 27)) type_ |= tOSXSAVE;
if (data[2] & (1U << 30)) type_ |= tRDRAND;
if (data[2] & (1U << 29)) type_ |= tF16C;
if (data[3] & (1U << 15)) type_ |= tCMOV;
if (data[3] & (1U << 23)) type_ |= tMMX;
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
if (data[3] & (1U << 26)) type_ |= tSSE2;
if (type_ & tOSXSAVE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64 bv = getXfeature();
if ((bv & 6) == 6) {
if (data[2] & (1U << 28)) type_ |= tAVX;
if (data[2] & (1U << 12)) type_ |= tFMA;
if (((bv >> 5) & 7) == 7) {
getCpuidEx(7, 0, data);
if (data[1] & (1U << 16)) type_ |= tAVX512F;
if (type_ & tAVX512F) {
if (data[1] & (1U << 17)) type_ |= tAVX512DQ;
if (data[1] & (1U << 21)) type_ |= tAVX512IFMA;
if (data[1] & (1U << 26)) type_ |= tAVX512PF;
if (data[1] & (1U << 27)) type_ |= tAVX512ER;
if (data[1] & (1U << 28)) type_ |= tAVX512CD;
if (data[1] & (1U << 30)) type_ |= tAVX512BW;
if (data[1] & (1U << 31)) type_ |= tAVX512VL;
if (data[2] & (1U << 1)) type_ |= tAVX512VBMI;
if (data[3] & (1U << 2)) type_ |= tAVX512_4VNNIW;
if (data[3] & (1U << 3)) type_ |= tAVX512_4FMAPS;
}
}
}
}
if (maxNum >= 7) {
getCpuidEx(7, 0, data);
if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2;
if (data[1] & (1U << 3)) type_ |= tBMI1;
if (data[1] & (1U << 8)) type_ |= tBMI2;
if (data[1] & (1U << 9)) type_ |= tENHANCED_REP;
if (data[1] & (1U << 18)) type_ |= tRDSEED;
if (data[1] & (1U << 19)) type_ |= tADX;
if (data[1] & (1U << 20)) type_ |= tSMAP;
if (data[1] & (1U << 4)) type_ |= tHLE;
if (data[1] & (1U << 11)) type_ |= tRTM;
if (data[1] & (1U << 14)) type_ |= tMPX;
if (data[1] & (1U << 29)) type_ |= tSHA;
if (data[2] & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
}
void putFamily() const
{
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
family, model, stepping, extFamily, extModel);
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
}
bool has(Type type) const
{
return (type & type_) != 0;
}
};
class Clock {
public:
static inline uint64 getRdtsc()
{
#ifdef _MSC_VER
return __rdtsc();
#else
unsigned int eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64)edx << 32) | eax;
#endif
}
Clock()
: clock_(0)
, count_(0)
{
}
void begin()
{
clock_ -= getRdtsc();
}
void end()
{
clock_ += getRdtsc();
count_++;
}
int getCount() const { return count_; }
uint64 getClock() const { return clock_; }
void clear() { count_ = 0; clock_ = 0; }
private:
uint64 clock_;
int count_;
};
#ifdef XBYAK64
const int UseRCX = 1 << 6;
const int UseRDX = 1 << 7;
class Pack {
static const size_t maxTblNum = 10;
const Xbyak::Reg64 *tbl_[maxTblNum];
size_t n_;
public:
Pack() : tbl_(), n_(0) {}
Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); }
Pack(const Pack& rhs)
: n_(rhs.n_)
{
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
}
Pack& operator=(const Pack& rhs)
{
n_ = rhs.n_;
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
return *this;
}
Pack(const Xbyak::Reg64& t0)
{ n_ = 1; tbl_[0] = &t0; }
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
Pack& append(const Xbyak::Reg64& t)
{
if (n_ == maxTblNum) {
fprintf(stderr, "ERR Pack::can't append\n");
throw Error(ERR_BAD_PARAMETER);
}
tbl_[n_++] = &t;
return *this;
}
void init(const Xbyak::Reg64 *tbl, size_t n)
{
if (n > maxTblNum) {
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
throw Error(ERR_BAD_PARAMETER);
}
n_ = n;
for (size_t i = 0; i < n; i++) {
tbl_[i] = &tbl[i];
}
}
const Xbyak::Reg64& operator[](size_t n) const
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
throw Error(ERR_BAD_PARAMETER);
}
return *tbl_[n];
}
size_t size() const { return n_; }
/*
get tbl[pos, pos + num)
*/
Pack sub(size_t pos, size_t num = size_t(-1)) const
{
if (num == size_t(-1)) num = n_ - pos;
if (pos + num > n_) {
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
throw Error(ERR_BAD_PARAMETER);
}
Pack pack;
pack.n_ = num;
for (size_t i = 0; i < num; i++) {
pack.tbl_[i] = tbl_[pos + i];
}
return pack;
}
void put() const
{
for (size_t i = 0; i < n_; i++) {
printf("%s ", tbl_[i]->toString());
}
printf("\n");
}
};
class StackFrame {
#ifdef XBYAK64_WIN
static const int noSaveNum = 6;
static const int rcxPos = 0;
static const int rdxPos = 1;
#else
static const int noSaveNum = 8;
static const int rcxPos = 3;
static const int rdxPos = 2;
#endif
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
bool useRcx_;
bool useRdx_;
int saveNum_;
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[10];
Pack p_;
Pack t_;
StackFrame(const StackFrame&);
void operator=(const StackFrame&);
public:
const Pack& p;
const Pack& t;
/*
make stack frame
@param sf [in] this
@param pNum [in] num of function parameter(0 <= pNum <= 4)
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
@param stackSizeByte [in] local stack size
@param makeEpilog [in] automatically call close() if true
you can use
rax
gp0, ..., gp(pNum - 1)
gt0, ..., gt(tNum-1)
rcx if tNum & UseRCX
rdx if tNum & UseRDX
rsp[0..stackSizeByte - 1]
*/
StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true)
: code_(code)
, pNum_(pNum)
, tNum_(tNum & ~(UseRCX | UseRDX))
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, saveNum_(0)
, P_(0)
, makeEpilog_(makeEpilog)
, p(p_)
, t(t_)
{
using namespace Xbyak;
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
const Reg64& _rsp = code->rsp;
const AddressFrame& _ptr = code->ptr;
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
P_ = saveNum_ + (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
P_ *= 8;
if (P_ > 0) code->sub(_rsp, P_);
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
}
for (int i = 4; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#else
for (int i = 0; i < saveNum_; i++) {
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
}
#endif
int pos = 0;
for (int i = 0; i < pNum; i++) {
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
for (int i = 0; i < tNum_; i++) {
tTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx);
if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx);
p_.init(pTbl_, pNum);
t_.init(tTbl_, tNum_);
}
/*
make epilog manually
@param callRet [in] call ret() if true
*/
void close(bool callRet = true)
{
using namespace Xbyak;
const Reg64& _rsp = code_->rsp;
const AddressFrame& _ptr = code_->ptr;
const int *tbl = getOrderTbl() + noSaveNum;
#ifdef XBYAK64_WIN
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
}
for (int i = 4; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#else
for (int i = 0; i < saveNum_; i++) {
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
}
#endif
if (P_ > 0) code_->add(_rsp, P_);
if (callRet) code_->ret();
}
~StackFrame()
{
if (!makeEpilog_) return;
try {
close();
} catch (std::exception& e) {
printf("ERR:StackFrame %s\n", e.what());
exit(1);
} catch (...) {
printf("ERR:StackFrame otherwise\n");
exit(1);
}
}
private:
const int *getOrderTbl() const
{
using namespace Xbyak;
static const int tbl[] = {
#ifdef XBYAK64_WIN
Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI,
#else
Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11,
#endif
Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15
};
return &tbl[0];
}
int getRegIdx(int& pos) const
{
assert(pos < 14);
using namespace Xbyak;
const int *tbl = getOrderTbl();
int r = tbl[pos++];
if (useRcx_) {
if (r == Operand::RCX) { return Operand::R10; }
if (r == Operand::R10) { r = tbl[pos++]; }
}
if (useRdx_) {
if (r == Operand::RDX) { return Operand::R11; }
if (r == Operand::R11) { return tbl[pos++]; }
}
return r;
}
};
#endif
} } // end of util
#endif