Port x64 backend to xbyak

This commit is contained in:
MerryMage 2016-08-24 20:07:08 +01:00
parent 611cffb612
commit e32812cd00
25 changed files with 1638 additions and 5323 deletions

View file

@ -29,6 +29,7 @@ else()
-Wfatal-errors
-Wno-unused-parameter
-Wno-missing-braces)
add_compile_options(-fno-operator-names)
if (ARCHITECTURE_x86_64)
add_compile_options(-msse4.1)
@ -67,6 +68,9 @@ include_directories(${Boost_INCLUDE_DIRS})
include_directories(externals/catch)
enable_testing(true) # Enables unit-testing.
# Include Xbyak
include_directories(externals/xbyak/xbyak)
# Include LLVM
if (DYNARMIC_USE_LLVM)
find_package(LLVM REQUIRED CONFIG)

View file

@ -2,17 +2,15 @@ include_directories(.)
include(CreateDirectoryGroups)
set(SRCS
backend_x64/abi.cpp
backend_x64/block_of_code.cpp
backend_x64/emit_x64.cpp
backend_x64/hostloc.cpp
backend_x64/interface_x64.cpp
backend_x64/jitstate.cpp
backend_x64/reg_alloc.cpp
common/memory_pool.cpp
common/memory_util.cpp
common/string_util.cpp
common/x64/abi.cpp
common/x64/cpu_detect.cpp
common/x64/emitter.cpp
frontend/arm_types.cpp
frontend/disassembler/disassembler_arm.cpp
frontend/disassembler/disassembler_thumb.cpp
@ -41,24 +39,21 @@ set(SRCS
)
set(HEADERS
backend_x64/abi.h
backend_x64/block_of_code.h
backend_x64/emit_x64.h
backend_x64/hostloc.h
backend_x64/jitstate.h
backend_x64/reg_alloc.h
common/assert.h
common/bit_set.h
common/bit_util.h
common/code_block.h
common/common_types.h
common/intrusive_list.h
common/iterator_util.h
common/memory_pool.h
common/memory_util.h
common/mp.h
common/scope_exit.h
common/string_util.h
common/x64/abi.h
common/x64/cpu_detect.h
common/x64/emitter.h
frontend/arm_types.h
frontend/arm/FPSCR.h
frontend/decoder/arm.h

113
src/backend_x64/abi.cpp Normal file
View file

@ -0,0 +1,113 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// 24th August 2016: This code was modified for Dynarmic.
#include <xbyak.h>
#include "backend_x64/abi.h"
#include "common/common_types.h"
#include "common/iterator_util.h"
namespace Dynarmic {
namespace BackendX64 {
constexpr size_t GPR_SIZE = 8;
constexpr size_t XMM_SIZE = 16;
struct FrameInfo {
size_t stack_subtraction = 0;
size_t xmm_offset = 0;
};
static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t frame_size) {
FrameInfo frame_info = {};
size_t rsp_alignment = 8; // We are always 8-byte aligned initially
rsp_alignment -= num_gprs * GPR_SIZE;
if (num_xmms > 0) {
frame_info.stack_subtraction = rsp_alignment & 0xF;
frame_info.stack_subtraction += num_xmms * XMM_SIZE;
}
size_t xmm_base = frame_info.stack_subtraction;
frame_info.stack_subtraction += frame_size;
frame_info.stack_subtraction += ABI_SHADOW_SPACE;
rsp_alignment -= frame_info.stack_subtraction;
frame_info.stack_subtraction += rsp_alignment & 0xF;
frame_info.xmm_offset = frame_info.stack_subtraction - xmm_base;
return frame_info;
}
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
using namespace Xbyak::util;
const size_t num_gprs = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsGPR);
const size_t num_xmms = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsXMM);
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
for (HostLoc gpr : ABI_ALL_CALLEE_SAVE) {
if (HostLocIsGPR(gpr)) {
code->push(HostLocToReg64(gpr));
}
}
if (frame_info.stack_subtraction != 0) {
code->sub(rsp, u32(frame_info.stack_subtraction));
}
size_t xmm_offset = frame_info.xmm_offset;
for (HostLoc xmm : ABI_ALL_CALLEE_SAVE) {
if (HostLocIsXMM(xmm)) {
code->movaps(code->xword[rsp + xmm_offset], HostLocToXmm(xmm));
xmm_offset += XMM_SIZE;
}
}
}
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
using namespace Xbyak::util;
const size_t num_gprs = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsGPR);
const size_t num_xmms = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsXMM);
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
size_t xmm_offset = frame_info.xmm_offset;
for (HostLoc xmm : Common::Reverse(ABI_ALL_CALLEE_SAVE)) {
if (HostLocIsXMM(xmm)) {
code->movaps(HostLocToXmm(xmm), code->xword[rsp + xmm_offset]);
xmm_offset += XMM_SIZE;
}
}
if (frame_info.stack_subtraction != 0) {
code->add(rsp, u32(frame_info.stack_subtraction));
}
for (HostLoc gpr : Common::Reverse(ABI_ALL_CALLEE_SAVE)) {
if (HostLocIsGPR(gpr)) {
code->pop(HostLocToReg64(gpr));
}
}
}
} // namespace BackendX64
} // namespace Dynarmic

119
src/backend_x64/abi.h Normal file
View file

@ -0,0 +1,119 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <array>
#include "backend_x64/hostloc.h"
namespace Dynarmic {
namespace BackendX64 {
#ifdef _WIN32
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
constexpr HostLoc ABI_PARAM1 = HostLoc::RCX;
constexpr HostLoc ABI_PARAM2 = HostLoc::RDX;
constexpr HostLoc ABI_PARAM3 = HostLoc::R8;
constexpr HostLoc ABI_PARAM4 = HostLoc::R9;
constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
HostLoc::RAX,
HostLoc::RCX,
HostLoc::RDX,
HostLoc::R8,
HostLoc::R9,
HostLoc::R10,
HostLoc::R11,
HostLoc::XMM0,
HostLoc::XMM1,
HostLoc::XMM2,
HostLoc::XMM3,
HostLoc::XMM4,
HostLoc::XMM5,
};
constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
HostLoc::RBX,
HostLoc::RSI,
HostLoc::RDI,
HostLoc::RBP,
HostLoc::R12,
HostLoc::R13,
HostLoc::R14,
HostLoc::R15,
HostLoc::XMM6,
HostLoc::XMM7,
HostLoc::XMM8,
HostLoc::XMM9,
HostLoc::XMM10,
HostLoc::XMM11,
HostLoc::XMM12,
HostLoc::XMM13,
HostLoc::XMM14,
HostLoc::XMM15,
};
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
#else
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
constexpr HostLoc ABI_PARAM1 = HostLoc::RDI;
constexpr HostLoc ABI_PARAM2 = HostLoc::RSI;
constexpr HostLoc ABI_PARAM3 = HostLoc::RDX;
constexpr HostLoc ABI_PARAM4 = HostLoc::RCX;
constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
HostLoc::RAX,
HostLoc::RCX,
HostLoc::RDX,
HostLoc::RDI,
HostLoc::RSI,
HostLoc::R8,
HostLoc::R9,
HostLoc::R10,
HostLoc::R11,
HostLoc::XMM0,
HostLoc::XMM1,
HostLoc::XMM2,
HostLoc::XMM3,
HostLoc::XMM4,
HostLoc::XMM5,
HostLoc::XMM6,
HostLoc::XMM7,
HostLoc::XMM8,
HostLoc::XMM9,
HostLoc::XMM10,
HostLoc::XMM11,
HostLoc::XMM12,
HostLoc::XMM13,
HostLoc::XMM14,
HostLoc::XMM15,
};
constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
HostLoc::RBX,
HostLoc::RBP,
HostLoc::R12,
HostLoc::R13,
HostLoc::R14,
HostLoc::R15,
};
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
#endif
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
} // namespace BackendX64
} // namespace Dynarmic

View file

@ -6,27 +6,22 @@
#include <limits>
#include <xbyak.h>
#include "backend_x64/abi.h"
#include "backend_x64/block_of_code.h"
#include "backend_x64/jitstate.h"
#include "common/x64/abi.h"
using namespace Gen;
#include "common/assert.h"
namespace Dynarmic {
namespace BackendX64 {
BlockOfCode::BlockOfCode() : Gen::XCodeBlock() {
AllocCodeSpace(128 * 1024 * 1024);
BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) {
ClearCache(false);
}
void BlockOfCode::ClearCache(bool poison_memory) {
if (poison_memory) {
ClearCodeSpace();
} else {
ResetCodePtr();
}
reset();
GenConstants();
GenRunCode();
GenReturnFromRunCode();
@ -42,68 +37,116 @@ size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cyc
}
void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
JMP(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch, true);
jmp(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch);
}
void BlockOfCode::GenConstants() {
const_FloatNegativeZero32 = AlignCode16();
Write32(0x80000000u);
const_FloatNaN32 = AlignCode16();
Write32(0x7fc00000u);
const_FloatNonSignMask32 = AlignCode16();
Write64(0x7fffffffu);
const_FloatNegativeZero64 = AlignCode16();
Write64(0x8000000000000000u);
const_FloatNaN64 = AlignCode16();
Write64(0x7ff8000000000000u);
const_FloatNonSignMask64 = AlignCode16();
Write64(0x7fffffffffffffffu);
const_FloatPenultimatePositiveDenormal64 = AlignCode16();
Write64(0x000ffffffffffffeu);
const_FloatMinS32 = AlignCode16();
Write64(0xc1e0000000000000u); // -2147483648 as a double
const_FloatMaxS32 = AlignCode16();
Write64(0x41dfffffffc00000u); // 2147483647 as a double
const_FloatPositiveZero32 = const_FloatPositiveZero64 = const_FloatMinU32 = AlignCode16();
Write64(0x0000000000000000u); // 0 as a double
const_FloatMaxU32 = AlignCode16();
Write64(0x41efffffffe00000u); // 4294967295 as a double
AlignCode16();
align();
L(const_FloatNegativeZero32);
dd(0x80000000u);
align();
L(const_FloatNaN32);
dd(0x7fc00000u);
align();
L(const_FloatNonSignMask32);
dq(0x7fffffffu);
align();
L(const_FloatNegativeZero64);
dq(0x8000000000000000u);
align();
L(const_FloatNaN64);
dq(0x7ff8000000000000u);
align();
L(const_FloatNonSignMask64);
dq(0x7fffffffffffffffu);
align();
L(const_FloatPenultimatePositiveDenormal64);
dq(0x000ffffffffffffeu);
align();
L(const_FloatMinS32);
dq(0xc1e0000000000000u); // -2147483648 as a double
align();
L(const_FloatMaxS32);
dq(0x41dfffffffc00000u); // 2147483647 as a double
align();
L(const_FloatPositiveZero32);
L(const_FloatPositiveZero64);
L(const_FloatMinU32);
dq(0x0000000000000000u); // 0 as a double
align();
L(const_FloatMaxU32);
dq(0x41efffffffe00000u); // 4294967295 as a double
align();
}
void BlockOfCode::GenRunCode() {
run_code = reinterpret_cast<RunCodeFuncType>(const_cast<u8*>(GetCodePtr()));
align();
run_code = getCurr<RunCodeFuncType>();
// This serves two purposes:
// 1. It saves all the registers we as a callee need to save.
// 2. It aligns the stack so that the code the JIT emits can assume
// that the stack is appropriately aligned for CALLs.
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
MOV(64, R(R15), R(ABI_PARAM1));
mov(r15, ABI_PARAM1);
SwitchMxcsrOnEntry();
JMPptr(R(ABI_PARAM2));
jmp(ABI_PARAM2);
}
void BlockOfCode::GenReturnFromRunCode() {
return_from_run_code = GetCodePtr();
return_from_run_code = getCurr<const void*>();
SwitchMxcsrOnExit();
return_from_run_code_without_mxcsr_switch = GetCodePtr();
return_from_run_code_without_mxcsr_switch = getCurr<const void*>();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
ABI_PopCalleeSaveRegistersAndAdjustStack(this);
ret();
}
void BlockOfCode::SwitchMxcsrOnEntry() {
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
}
void BlockOfCode::SwitchMxcsrOnExit() {
STMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
LDMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
stmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
ldmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
}
void BlockOfCode::CallFunction(const void* fn) {
u64 distance = u64(fn) - (getCurr<u64>() + 5);
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
// Far call
mov(rax, u64(fn));
call(rax);
} else {
call(fn);
}
}
void BlockOfCode::SetCodePtr(CodePtr ptr) {
// The "size" defines where top_, the insertion point, is.
size_t required_size = reinterpret_cast<const u8*>(ptr) - getCode();
setSize(required_size);
}
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin);
ASSERT(current_size <= size);
nop(size - current_size);
}
} // namespace BackendX64

View file

@ -7,15 +7,17 @@
#pragma once
#include <array>
#include <vector>
#include <xbyak.h>
#include "backend_x64/jitstate.h"
#include "common/common_types.h"
#include "common/x64/emitter.h"
namespace Dynarmic {
namespace BackendX64 {
class BlockOfCode final : public Gen::XCodeBlock {
class BlockOfCode final : public Xbyak::CodeGenerator {
public:
BlockOfCode();
@ -30,73 +32,99 @@ public:
void SwitchMxcsrOnEntry();
/// Code emitter: Makes saved host MXCSR the current MXCSR
void SwitchMxcsrOnExit();
/// Code emitter: Calls the function
void CallFunction(const void* fn);
Gen::OpArg MFloatPositiveZero32() const {
return Gen::M(const_FloatPositiveZero32);
Xbyak::Address MFloatPositiveZero32() {
return xword[rip + const_FloatPositiveZero32];
}
Gen::OpArg MFloatNegativeZero32() const {
return Gen::M(const_FloatNegativeZero32);
Xbyak::Address MFloatNegativeZero32() {
return xword[rip + const_FloatNegativeZero32];
}
Gen::OpArg MFloatNaN32() const {
return Gen::M(const_FloatNaN32);
Xbyak::Address MFloatNaN32() {
return xword[rip + const_FloatNaN32];
}
Gen::OpArg MFloatNonSignMask32() const {
return Gen::M(const_FloatNonSignMask32);
Xbyak::Address MFloatNonSignMask32() {
return xword[rip + const_FloatNonSignMask32];
}
Gen::OpArg MFloatPositiveZero64() const {
return Gen::M(const_FloatPositiveZero64);
Xbyak::Address MFloatPositiveZero64() {
return xword[rip + const_FloatPositiveZero64];
}
Gen::OpArg MFloatNegativeZero64() const {
return Gen::M(const_FloatNegativeZero64);
Xbyak::Address MFloatNegativeZero64() {
return xword[rip + const_FloatNegativeZero64];
}
Gen::OpArg MFloatNaN64() const {
return Gen::M(const_FloatNaN64);
Xbyak::Address MFloatNaN64() {
return xword[rip + const_FloatNaN64];
}
Gen::OpArg MFloatNonSignMask64() const {
return Gen::M(const_FloatNonSignMask64);
Xbyak::Address MFloatNonSignMask64() {
return xword[rip + const_FloatNonSignMask64];
}
Gen::OpArg MFloatPenultimatePositiveDenormal64() const {
return Gen::M(const_FloatPenultimatePositiveDenormal64);
Xbyak::Address MFloatPenultimatePositiveDenormal64() {
return xword[rip + const_FloatPenultimatePositiveDenormal64];
}
Gen::OpArg MFloatMinS32() const {
return Gen::M(const_FloatMinS32);
Xbyak::Address MFloatMinS32() {
return xword[rip + const_FloatMinS32];
}
Gen::OpArg MFloatMaxS32() const {
return Gen::M(const_FloatMaxS32);
Xbyak::Address MFloatMaxS32() {
return xword[rip + const_FloatMaxS32];
}
Gen::OpArg MFloatMinU32() const {
return Gen::M(const_FloatMinU32);
Xbyak::Address MFloatMinU32() {
return xword[rip + const_FloatMinU32];
}
Gen::OpArg MFloatMaxU32() const {
return Gen::M(const_FloatMaxU32);
Xbyak::Address MFloatMaxU32() {
return xword[rip + const_FloatMaxU32];
}
CodePtr GetReturnFromRunCodeAddress() const {
const void* GetReturnFromRunCodeAddress() const {
return return_from_run_code;
}
void int3() { db(0xCC); }
void nop(size_t size = 0) {
for (size_t i = 0; i < size; i++) {
db(0x90);
}
}
void SetCodePtr(CodePtr ptr);
void EnsurePatchLocationSize(CodePtr begin, size_t size);
#ifdef _WIN32
Xbyak::Reg64 ABI_RETURN = rax;
Xbyak::Reg64 ABI_PARAM1 = rcx;
Xbyak::Reg64 ABI_PARAM2 = rdx;
Xbyak::Reg64 ABI_PARAM3 = r8;
Xbyak::Reg64 ABI_PARAM4 = r9;
#else
Xbyak::Reg64 ABI_RETURN = rax;
Xbyak::Reg64 ABI_PARAM1 = rdi;
Xbyak::Reg64 ABI_PARAM2 = rsi;
Xbyak::Reg64 ABI_PARAM3 = rdx;
Xbyak::Reg64 ABI_PARAM4 = rcx;
#endif
private:
const u8* const_FloatPositiveZero32 = nullptr;
const u8* const_FloatNegativeZero32 = nullptr;
const u8* const_FloatNaN32 = nullptr;
const u8* const_FloatNonSignMask32 = nullptr;
const u8* const_FloatPositiveZero64 = nullptr;
const u8* const_FloatNegativeZero64 = nullptr;
const u8* const_FloatNaN64 = nullptr;
const u8* const_FloatNonSignMask64 = nullptr;
const u8* const_FloatPenultimatePositiveDenormal64 = nullptr;
const u8* const_FloatMinS32 = nullptr;
const u8* const_FloatMaxS32 = nullptr;
const u8* const_FloatMinU32 = nullptr;
const u8* const_FloatMaxU32 = nullptr;
Xbyak::Label const_FloatPositiveZero32;
Xbyak::Label const_FloatNegativeZero32;
Xbyak::Label const_FloatNaN32;
Xbyak::Label const_FloatNonSignMask32;
Xbyak::Label const_FloatPositiveZero64;
Xbyak::Label const_FloatNegativeZero64;
Xbyak::Label const_FloatNaN64;
Xbyak::Label const_FloatNonSignMask64;
Xbyak::Label const_FloatPenultimatePositiveDenormal64;
Xbyak::Label const_FloatMinS32;
Xbyak::Label const_FloatMaxS32;
Xbyak::Label const_FloatMinU32;
Xbyak::Label const_FloatMaxU32;
void GenConstants();
using RunCodeFuncType = void(*)(JitState*, CodePtr);
RunCodeFuncType run_code = nullptr;
void GenRunCode();
CodePtr return_from_run_code = nullptr;
CodePtr return_from_run_code_without_mxcsr_switch = nullptr;
const void* return_from_run_code = nullptr;
const void* return_from_run_code_without_mxcsr_switch = nullptr;
void GenReturnFromRunCode();
};

File diff suppressed because it is too large Load diff

View file

@ -11,10 +11,10 @@
#include <vector>
#include <boost/optional.hpp>
#include <xbyak.h>
#include "backend_x64/block_of_code.h"
#include "backend_x64/reg_alloc.h"
#include "common/x64/emitter.h"
#include "frontend/arm_types.h"
#include "frontend/ir/basic_block.h"
#include "frontend/ir/microinstruction.h"

View file

@ -0,0 +1,35 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "backend_x64/hostloc.h"
namespace Dynarmic {
namespace BackendX64 {
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
DEBUG_ASSERT(HostLocIsGPR(loc));
DEBUG_ASSERT(loc != HostLoc::RSP);
DEBUG_ASSERT(loc != HostLoc::R15);
return Xbyak::Reg64(static_cast<int>(loc));
}
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
DEBUG_ASSERT(HostLocIsXMM(loc));
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
}
Xbyak::Address SpillToOpArg(HostLoc loc) {
using namespace Xbyak::util;
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
DEBUG_ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
return qword[r15 + offsetof(JitState, Spill) + i * sizeof(u64)];
}
} // namespace BackendX64
} // namespace Dynarmic

98
src/backend_x64/hostloc.h Normal file
View file

@ -0,0 +1,98 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <xbyak.h>
#include "backend_x64/jitstate.h"
#include "common/assert.h"
#include "common/common_types.h"
namespace Dynarmic {
namespace BackendX64 {
enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToX64.
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
CF, PF, AF, ZF, SF, OF,
FirstSpill,
};
constexpr size_t HostLocCount = static_cast<size_t>(HostLoc::FirstSpill) + SpillCount;
inline bool HostLocIsGPR(HostLoc reg) {
return reg >= HostLoc::RAX && reg <= HostLoc::R14;
}
inline bool HostLocIsXMM(HostLoc reg) {
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
}
inline bool HostLocIsRegister(HostLoc reg) {
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
}
inline bool HostLocIsFlag(HostLoc reg) {
return reg >= HostLoc::CF && reg <= HostLoc::OF;
}
inline HostLoc HostLocSpill(size_t i) {
ASSERT_MSG(i < SpillCount, "Invalid spill");
return static_cast<HostLoc>(static_cast<int>(HostLoc::FirstSpill) + i);
}
inline bool HostLocIsSpill(HostLoc reg) {
return reg >= HostLoc::FirstSpill && reg <= HostLocSpill(SpillCount - 1);
}
using HostLocList = std::initializer_list<HostLoc>;
// RSP is preserved for function calls
// R15 contains the JitState pointer
const HostLocList any_gpr = {
HostLoc::RAX,
HostLoc::RBX,
HostLoc::RCX,
HostLoc::RDX,
HostLoc::RSI,
HostLoc::RDI,
HostLoc::RBP,
HostLoc::R8,
HostLoc::R9,
HostLoc::R10,
HostLoc::R11,
HostLoc::R12,
HostLoc::R13,
HostLoc::R14,
};
const HostLocList any_xmm = {
HostLoc::XMM0,
HostLoc::XMM1,
HostLoc::XMM2,
HostLoc::XMM3,
HostLoc::XMM4,
HostLoc::XMM5,
HostLoc::XMM6,
HostLoc::XMM7,
HostLoc::XMM8,
HostLoc::XMM9,
HostLoc::XMM10,
HostLoc::XMM11,
HostLoc::XMM12,
HostLoc::XMM13,
HostLoc::XMM14,
HostLoc::XMM15,
};
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc);
Xbyak::Address SpillToOpArg(HostLoc loc);
} // namespace BackendX64
} // namespace Dynarmic

View file

@ -15,7 +15,7 @@ namespace BackendX64 {
class BlockOfCode;
constexpr size_t SpillCount = 32;
constexpr size_t SpillCount = 64;
struct JitState {
explicit JitState(const BlockOfCode* code) { ResetRSB(code); }
@ -54,7 +54,7 @@ struct JitState {
void SetFpscr(u32 FPSCR);
};
using CodePtr = const u8*;
using CodePtr = const void*;
} // namespace BackendX64
} // namespace Dynarmic

View file

@ -7,52 +7,42 @@
#include <algorithm>
#include <map>
#include <xbyak.h>
#include "backend_x64/jitstate.h"
#include "backend_x64/reg_alloc.h"
#include "common/assert.h"
#include "common/x64/emitter.h"
namespace Dynarmic {
namespace BackendX64 {
static Gen::OpArg ImmediateToOpArg(const IR::Value& imm) {
static u32 ImmediateToU32(const IR::Value& imm) {
switch (imm.GetType()) {
case IR::Type::U1:
return Gen::Imm32(imm.GetU1());
return u32(imm.GetU1());
break;
case IR::Type::U8:
return Gen::Imm32(imm.GetU8());
return u32(imm.GetU8());
break;
case IR::Type::U32:
return Gen::Imm32(imm.GetU32());
return u32(imm.GetU32());
break;
default:
ASSERT_MSG(false, "This should never happen.");
}
}
static Gen::X64Reg HostLocToX64(HostLoc loc) {
DEBUG_ASSERT(HostLocIsRegister(loc));
DEBUG_ASSERT(loc != HostLoc::RSP);
// HostLoc is ordered such that the numbers line up.
if (HostLocIsGPR(loc)) {
return static_cast<Gen::X64Reg>(loc);
static Xbyak::Reg HostLocToX64(HostLoc hostloc) {
if (HostLocIsGPR(hostloc)) {
return HostLocToReg64(hostloc);
}
if (HostLocIsXMM(loc)) {
return static_cast<Gen::X64Reg>(size_t(loc) - size_t(HostLoc::XMM0));
if (HostLocIsXMM(hostloc)) {
return HostLocToXmm(hostloc);
}
ASSERT_MSG(false, "This should never happen.");
return Gen::INVALID_REG;
}
static Gen::OpArg SpillToOpArg(HostLoc loc) {
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
DEBUG_ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
return Gen::MDisp(Gen::R15, static_cast<int>(offsetof(JitState, Spill) + i * sizeof(u64)));
}
Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, HostLocList desired_locations) {
HostLoc RegAlloc::DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations) {
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
@ -66,14 +56,14 @@ Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, HostLocList desired_locati
LocInfo(location).def = def_inst;
DEBUG_ASSERT(LocInfo(location).IsDef());
return HostLocToX64(location);
return location;
}
void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) {
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
if (use_inst.IsImmediate()) {
LoadImmediateIntoRegister(use_inst, DefRegister(def_inst, any_gpr));
LoadImmediateIntoHostLocReg(use_inst, DefHostLocReg(def_inst, any_gpr));
return;
}
@ -84,15 +74,15 @@ void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) {
DEBUG_ASSERT(LocInfo(location).IsIdle());
}
Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
HostLoc RegAlloc::UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
if (!use_value.IsImmediate()) {
return UseDefRegister(use_value.GetInst(), def_inst, desired_locations);
return UseDefHostLocReg(use_value.GetInst(), def_inst, desired_locations);
}
return LoadImmediateIntoRegister(use_value, DefRegister(def_inst, desired_locations));
return LoadImmediateIntoHostLocReg(use_value, DefHostLocReg(def_inst, desired_locations));
}
Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) {
HostLoc RegAlloc::UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) {
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined");
@ -112,9 +102,9 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos
EmitMove(new_location, current_location);
LocInfo(new_location) = LocInfo(current_location);
LocInfo(current_location) = {};
return HostLocToX64(new_location);
return new_location;
} else {
return HostLocToX64(current_location);
return current_location;
}
}
}
@ -122,17 +112,17 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos
bool is_floating_point = HostLocIsXMM(*desired_locations.begin());
if (is_floating_point)
DEBUG_ASSERT(use_inst->GetType() == IR::Type::F32 || use_inst->GetType() == IR::Type::F64);
Gen::X64Reg use_reg = UseRegister(use_inst, is_floating_point ? any_xmm : any_gpr);
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
HostLoc use_reg = UseHostLocReg(use_inst, is_floating_point ? any_xmm : any_gpr);
HostLoc def_reg = DefHostLocReg(def_inst, desired_locations);
if (is_floating_point) {
code->MOVAPD(def_reg, Gen::R(use_reg));
code->movapd(HostLocToXmm(def_reg), HostLocToXmm(use_reg));
} else {
code->MOV(64, Gen::R(def_reg), Gen::R(use_reg));
code->mov(HostLocToReg64(def_reg), HostLocToReg64(use_reg));
}
return def_reg;
}
std::tuple<Gen::OpArg, Gen::X64Reg> RegAlloc::UseDefOpArg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
std::tuple<OpArg, HostLoc> RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
DEBUG_ASSERT_MSG(use_value.IsImmediate() || ValueLocation(use_value.GetInst()), "use_inst has not been defined");
@ -147,37 +137,37 @@ std::tuple<Gen::OpArg, Gen::X64Reg> RegAlloc::UseDefOpArg(IR::Value use_value, I
if (HostLocIsSpill(current_location)) {
loc_info.is_being_used = true;
DEBUG_ASSERT(loc_info.IsUse());
return std::make_tuple(SpillToOpArg(current_location), DefRegister(def_inst, desired_locations));
return std::make_tuple(SpillToOpArg(current_location), DefHostLocReg(def_inst, desired_locations));
} else {
loc_info.is_being_used = true;
loc_info.def = def_inst;
DEBUG_ASSERT(loc_info.IsUseDef());
return std::make_tuple(Gen::R(HostLocToX64(current_location)), HostLocToX64(current_location));
return std::make_tuple(HostLocToX64(current_location), current_location);
}
}
}
}
Gen::OpArg use_oparg = UseOpArg(use_value, any_gpr);
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
OpArg use_oparg = UseOpArg(use_value, any_gpr);
HostLoc def_reg = DefHostLocReg(def_inst, desired_locations);
return std::make_tuple(use_oparg, def_reg);
}
Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, HostLocList desired_locations) {
HostLoc RegAlloc::UseHostLocReg(IR::Value use_value, HostLocList desired_locations) {
if (!use_value.IsImmediate()) {
return UseRegister(use_value.GetInst(), desired_locations);
return UseHostLocReg(use_value.GetInst(), desired_locations);
}
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations));
}
Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, HostLocList desired_locations) {
HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) {
HostLoc current_location;
bool was_being_used;
std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations);
if (HostLocIsRegister(current_location)) {
return HostLocToX64(current_location);
return current_location;
} else if (HostLocIsSpill(current_location)) {
HostLoc new_location = SelectARegister(desired_locations);
if (IsRegisterOccupied(new_location)) {
@ -192,16 +182,15 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, HostLocList desired_locati
LocInfo(new_location).is_being_used = true;
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
}
return HostLocToX64(new_location);
return new_location;
}
ASSERT_MSG(false, "Unknown current_location type");
return Gen::INVALID_REG;
}
Gen::OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) {
OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) {
if (use_value.IsImmediate()) {
return ImmediateToOpArg(use_value);
return Xbyak::Operand(); // return a None
}
IR::Inst* use_inst = use_value.GetInst();
@ -211,24 +200,23 @@ Gen::OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations
std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations);
if (HostLocIsRegister(current_location)) {
return Gen::R(HostLocToX64(current_location));
return HostLocToX64(current_location);
} else if (HostLocIsSpill(current_location)) {
return SpillToOpArg(current_location);
}
ASSERT_MSG(false, "Unknown current_location type");
return Gen::R(Gen::INVALID_REG);
}
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value use_value, HostLocList desired_locations) {
HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations) {
if (!use_value.IsImmediate()) {
return UseScratchRegister(use_value.GetInst(), desired_locations);
return UseScratchHostLocReg(use_value.GetInst(), desired_locations);
}
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations));
}
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations) {
HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) {
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined");
ASSERT_MSG(use_inst->HasUses(), "use_inst ran out of uses. (Use-d an IR::Inst* too many times)");
@ -244,7 +232,7 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired
LocInfo(new_location).is_being_used = true;
DecrementRemainingUses(use_inst);
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
return HostLocToX64(new_location);
return new_location;
} else if (HostLocIsRegister(current_location)) {
ASSERT(LocInfo(current_location).IsIdle()
|| LocInfo(current_location).IsUse()
@ -260,14 +248,13 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired
LocInfo(new_location).values.clear();
DecrementRemainingUses(use_inst);
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
return HostLocToX64(new_location);
return new_location;
}
ASSERT_MSG(0, "Invalid current_location");
return Gen::INVALID_REG;
ASSERT_MSG(false, "Invalid current_location");
}
Gen::X64Reg RegAlloc::ScratchRegister(HostLocList desired_locations) {
HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) {
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
HostLoc location = SelectARegister(desired_locations);
@ -280,7 +267,7 @@ Gen::X64Reg RegAlloc::ScratchRegister(HostLocList desired_locations) {
LocInfo(location).is_being_used = true;
DEBUG_ASSERT(LocInfo(location).IsScratch());
return HostLocToX64(location);
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1_use, IR::Value arg2_use, IR::Value arg3_use) {
@ -300,26 +287,26 @@ void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1
// TODO: This works but almost certainly leads to suboptimal generated code.
for (HostLoc caller_save : OtherCallerSave) {
ScratchRegister({caller_save});
ScratchHostLocReg({caller_save});
}
if (result_def) {
DefRegister(result_def, {AbiReturn});
DefHostLocReg(result_def, {AbiReturn});
} else {
ScratchRegister({AbiReturn});
ScratchHostLocReg({AbiReturn});
}
for (size_t i = 0; i < AbiArgs.size(); i++) {
if (!args[i]->IsEmpty()) {
UseScratchRegister(*args[i], {AbiArgs[i]});
UseScratchHostLocReg(*args[i], {AbiArgs[i]});
} else {
ScratchRegister({AbiArgs[i]});
ScratchHostLocReg({AbiArgs[i]});
}
}
// Flush all xmm registers
for (auto xmm : any_xmm) {
ScratchRegister({xmm});
ScratchHostLocReg({xmm});
}
}
@ -420,17 +407,17 @@ void RegAlloc::Reset() {
void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
code->MOVSD(HostLocToX64(to), SpillToOpArg(from));
code->movsd(HostLocToXmm(to), SpillToOpArg(from));
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
code->MOVSD(SpillToOpArg(to), HostLocToX64(from));
code->movsd(SpillToOpArg(to), HostLocToXmm(from));
} else if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
code->MOVAPS(HostLocToX64(to), Gen::R(HostLocToX64(from)));
code->movaps(HostLocToXmm(to), HostLocToXmm(from));
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
code->MOV(64, Gen::R(HostLocToX64(to)), SpillToOpArg(from));
code->mov(HostLocToReg64(to), SpillToOpArg(from));
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
code->MOV(64, SpillToOpArg(to), Gen::R(HostLocToX64(from)));
code->mov(SpillToOpArg(to), HostLocToReg64(from));
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)){
code->MOV(64, Gen::R(HostLocToX64(to)), Gen::R(HostLocToX64(from)));
code->mov(HostLocToReg64(to), HostLocToReg64(from));
} else {
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
}
@ -438,7 +425,7 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
code->XCHG(64, Gen::R(HostLocToX64(a)), Gen::R(HostLocToX64(b)));
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
ASSERT_MSG(false, "Exchange is unnecessary for XMM registers");
} else {
@ -495,14 +482,17 @@ std::tuple<HostLoc, bool> RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d
return std::make_tuple(static_cast<HostLoc>(-1), false);
}
Gen::X64Reg RegAlloc::LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg) {
HostLoc RegAlloc::LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc host_loc) {
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
Gen::OpArg op_arg = ImmediateToOpArg(imm);
if (op_arg.GetImmValue() == 0)
code->XOR(32, Gen::R(reg), Gen::R(reg));
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
u32 imm_value = ImmediateToU32(imm);
if (imm_value == 0)
code->xor_(reg, reg);
else
code->MOV(32, Gen::R(reg), op_arg);
return reg;
code->mov(reg.cvt32(), imm_value);
return host_loc;
}
} // namespace BackendX64

View file

@ -11,93 +11,55 @@
#include <vector>
#include <boost/optional.hpp>
#include <xbyak.h>
#include "backend_x64/block_of_code.h"
#include "backend_x64/hostloc.h"
#include "backend_x64/jitstate.h"
#include "common/common_types.h"
#include "common/x64/emitter.h"
#include "frontend/ir/microinstruction.h"
#include "frontend/ir/value.h"
namespace Dynarmic {
namespace BackendX64 {
enum class HostLoc {
// Ordering of the registers is intentional. See also: HostLocToX64.
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
CF, PF, AF, ZF, SF, OF,
FirstSpill,
};
struct OpArg {
OpArg() : type(OPERAND), inner_operand() {}
OpArg(const Xbyak::Address& address) : type(ADDRESS), inner_address(address) {}
OpArg(const Xbyak::Operand& operand) : type(OPERAND), inner_operand(operand) {}
constexpr size_t HostLocCount = static_cast<size_t>(HostLoc::FirstSpill) + SpillCount;
Xbyak::Operand& operator*() {
switch (type) {
case ADDRESS:
return inner_address;
case OPERAND:
return inner_operand;
}
ASSERT_MSG(false, "Unreachable");
}
enum class HostLocState {
Idle, Def, Use, Scratch
};
void setBit(int bits) {
switch (type) {
case ADDRESS:
inner_address.setBit(bits);
return;
case OPERAND:
inner_operand.setBit(bits);
return;
}
ASSERT_MSG(false, "Unreachable");
}
inline bool HostLocIsGPR(HostLoc reg) {
return reg >= HostLoc::RAX && reg <= HostLoc::R14;
}
private:
enum {
OPERAND,
ADDRESS,
} type;
inline bool HostLocIsXMM(HostLoc reg) {
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
}
inline bool HostLocIsRegister(HostLoc reg) {
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
}
inline bool HostLocIsFlag(HostLoc reg) {
return reg >= HostLoc::CF && reg <= HostLoc::OF;
}
inline HostLoc HostLocSpill(size_t i) {
ASSERT_MSG(i < SpillCount, "Invalid spill");
return static_cast<HostLoc>(static_cast<int>(HostLoc::FirstSpill) + i);
}
inline bool HostLocIsSpill(HostLoc reg) {
return reg >= HostLoc::FirstSpill && reg <= HostLocSpill(SpillCount - 1);
}
using HostLocList = std::initializer_list<HostLoc>;
const HostLocList any_gpr = {
HostLoc::RAX,
HostLoc::RBX,
HostLoc::RCX,
HostLoc::RDX,
HostLoc::RSI,
HostLoc::RDI,
HostLoc::RBP,
HostLoc::R8,
HostLoc::R9,
HostLoc::R10,
HostLoc::R11,
HostLoc::R12,
HostLoc::R13,
HostLoc::R14,
};
const HostLocList any_xmm = {
HostLoc::XMM0,
HostLoc::XMM1,
HostLoc::XMM2,
HostLoc::XMM3,
HostLoc::XMM4,
HostLoc::XMM5,
HostLoc::XMM6,
HostLoc::XMM7,
HostLoc::XMM8,
HostLoc::XMM9,
HostLoc::XMM10,
HostLoc::XMM11,
HostLoc::XMM12,
HostLoc::XMM13,
HostLoc::XMM14,
HostLoc::XMM15,
union {
Xbyak::Operand inner_operand;
Xbyak::Address inner_address;
};
};
class RegAlloc final {
@ -105,21 +67,54 @@ public:
RegAlloc(BlockOfCode* code) : code(code) {}
/// Late-def
Gen::X64Reg DefRegister(IR::Inst* def_inst, HostLocList desired_locations);
Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
return HostLocToReg64(DefHostLocReg(def_inst, desired_locations));
}
Xbyak::Xmm DefXmm(IR::Inst* def_inst, HostLocList desired_locations = any_xmm) {
return HostLocToXmm(DefHostLocReg(def_inst, desired_locations));
}
void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst);
/// Early-use, Late-def
Gen::X64Reg UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
Gen::X64Reg UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations);
std::tuple<Gen::OpArg, Gen::X64Reg> UseDefOpArg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
Xbyak::Reg64 UseDefGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
return HostLocToReg64(UseDefHostLocReg(use_value, def_inst, desired_locations));
}
Xbyak::Xmm UseDefXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm) {
return HostLocToXmm(UseDefHostLocReg(use_value, def_inst, desired_locations));
}
std::tuple<OpArg, Xbyak::Reg64> UseDefOpArgGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
OpArg op;
HostLoc host_loc;
std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations);
return std::make_tuple(op, HostLocToReg64(host_loc));
}
std::tuple<OpArg, Xbyak::Xmm> UseDefOpArgXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
OpArg op;
HostLoc host_loc;
std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations);
return std::make_tuple(op, HostLocToXmm(host_loc));
}
/// Early-use
Gen::X64Reg UseRegister(IR::Value use_value, HostLocList desired_locations);
Gen::X64Reg UseRegister(IR::Inst* use_inst, HostLocList desired_locations);
Gen::OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations);
Xbyak::Reg64 UseGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) {
return HostLocToReg64(UseHostLocReg(use_value, desired_locations));
}
Xbyak::Xmm UseXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) {
return HostLocToXmm(UseHostLocReg(use_value, desired_locations));
}
OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations);
/// Early-use, Destroyed
Gen::X64Reg UseScratchRegister(IR::Value use_value, HostLocList desired_locations);
Gen::X64Reg UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations);
Xbyak::Reg64 UseScratchGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) {
return HostLocToReg64(UseScratchHostLocReg(use_value, desired_locations));
}
Xbyak::Xmm UseScratchXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) {
return HostLocToXmm(UseScratchHostLocReg(use_value, desired_locations));
}
/// Early-def, Late-use, single-use
Gen::X64Reg ScratchRegister(HostLocList desired_locations);
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) {
return HostLocToReg64(ScratchHostLocReg(desired_locations));
}
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) {
return HostLocToXmm(ScratchHostLocReg(desired_locations));
}
/// Late-def for result register, Early-use for all arguments, Each value is placed into registers according to host ABI.
void HostCall(IR::Inst* result_def = nullptr, IR::Value arg0_use = {}, IR::Value arg1_use = {}, IR::Value arg2_use = {}, IR::Value arg3_use = {});
@ -141,11 +136,20 @@ private:
bool IsRegisterAllocated(HostLoc loc) const;
bool IsLastUse(IR::Inst* inst) const;
HostLoc DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations);
HostLoc UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
HostLoc UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations);
std::tuple<OpArg, HostLoc> UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations);
HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations);
std::tuple<HostLoc, bool> UseHostLoc(IR::Inst* use_inst, HostLocList desired_locations);
HostLoc UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations);
HostLoc UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations);
HostLoc ScratchHostLocReg(HostLocList desired_locations);
void EmitMove(HostLoc to, HostLoc from);
void EmitExchange(HostLoc a, HostLoc b);
Gen::X64Reg LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg);
HostLoc LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc reg);
void SpillRegister(HostLoc loc);
HostLoc FindFreeSpill() const;

View file

@ -1,190 +0,0 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#ifdef _WIN32
#include <intrin.h>
#endif
#include <initializer_list>
#include <new>
#include <type_traits>
#include "common/common_types.h"
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
namespace Common {
// Helper functions:
#ifdef _WIN32
template <typename T>
static inline int CountSetBits(T v)
{
// from https://graphics.stanford.edu/~seander/bithacks.html
// GCC has this built in, but MSVC's intrinsic will only emit the actual
// POPCNT instruction, which we're not depending on
v = v - ((v >> 1) & (T)~(T)0/3);
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
v = (v + (v >> 4)) & (T)~(T)0/255*15;
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
}
static inline int LeastSignificantSetBit(uint8_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint16_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint32_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint64_t val)
{
unsigned long index;
_BitScanForward64(&index, val);
return (int)index;
}
#else
static inline int CountSetBits(uint8_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint16_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint32_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint64_t val) { return __builtin_popcountll(val); }
static inline int LeastSignificantSetBit(uint8_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint16_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint32_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint64_t val) { return __builtin_ctzll(val); }
#endif
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
// using the set bits of an integer to represent a set of integers. Like that
// class, it acts like an array of bools:
// BitSet32 bs;
// bs[1] = true;
// but also like the underlying integer ([0] = least significant bit):
// BitSet32 bs2 = ...;
// bs = (bs ^ bs2) & BitSet32(0xffff);
// The following additional functionality is provided:
// - Construction using an initializer list.
// BitSet bs { 1, 2, 4, 8 };
// - Efficiently iterating through the set bits:
// for (int i : bs)
// [i is the *index* of a set bit]
// (This uses the appropriate CPU instruction to find the next set bit in one
// operation.)
// - Counting set bits using .Count() - see comment on that method.
// TODO: use constexpr when MSVC gets out of the Dark Ages
template <typename IntTy>
class BitSet
{
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
public:
// A reference to a particular bit, returned from operator[].
class Ref
{
public:
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
bool operator=(bool set)
{
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
return set;
}
private:
BitSet* m_bs;
IntTy m_mask;
};
// A STL-like iterator is required to be able to use range-based for loops.
class Iterator
{
public:
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
int operator*() { return m_bit; }
Iterator& operator++()
{
if (m_val == 0)
{
m_bit = -1;
}
else
{
int bit = LeastSignificantSetBit(m_val);
m_val &= ~(1 << bit);
m_bit = bit;
}
return *this;
}
Iterator operator++(int _)
{
Iterator other(*this);
++*this;
return other;
}
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
private:
IntTy m_val;
int m_bit;
};
BitSet() : m_val(0) {}
explicit BitSet(IntTy val) : m_val(val) {}
BitSet(std::initializer_list<int> init)
{
m_val = 0;
for (int bit : init)
m_val |= (IntTy)1 << bit;
}
static BitSet AllTrue(size_t count)
{
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
}
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
bool operator==(BitSet other) const { return m_val == other.m_val; }
bool operator!=(BitSet other) const { return m_val != other.m_val; }
bool operator<(BitSet other) const { return m_val < other.m_val; }
bool operator>(BitSet other) const { return m_val > other.m_val; }
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
BitSet operator~() const { return BitSet(~m_val); }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
operator uint32_t() = delete;
operator bool() { return m_val != 0; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally
// be faster than a loop.
unsigned int Count() const { return CountSetBits(m_val); }
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
Iterator end() const { return Iterator(m_val, -1); }
IntTy m_val;
};
} // Common
typedef Common::BitSet<uint8_t> BitSet8;
typedef Common::BitSet<uint16_t> BitSet16;
typedef Common::BitSet<uint32_t> BitSet32;
typedef Common::BitSet<uint64_t> BitSet64;

View file

@ -1,92 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include "common/common_types.h"
#include "common/memory_util.h"
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions without
// having to prefix them with gen-> or something similar.
// Example implementation:
// class JIT : public CodeBlock<ARMXEmitter> {}
template<class T> class CodeBlock : public T
{
private:
// A privately used function to set the executable RAM space to something invalid.
// For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
virtual void PoisonMemory() = 0;
protected:
u8 *region;
size_t region_size;
public:
CodeBlock() : region(nullptr), region_size(0) {}
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(int size)
{
region_size = size;
region = (u8*)AllocateExecutableMemory(region_size);
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace()
{
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace()
{
#ifdef __SYMBIAN32__
ResetExecutableMemory(region);
#else
FreeMemoryPages(region, region_size);
#endif
region = nullptr;
region_size = 0;
}
bool IsInSpace(const u8 *ptr)
{
return (ptr >= region) && (ptr < (region + region_size));
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
void WriteProtect()
{
WriteProtectMemory(region, region_size, true);
}
void ResetCodePtr()
{
T::SetCodePtr(region);
}
size_t GetSpaceLeft() const
{
return region_size - (T::GetCodePtr() - region);
}
u8 *GetBasePtr() {
return region;
}
size_t GetOffset(const u8 *ptr) const {
return ptr - region;
}
};

View file

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <iterator>
namespace Dynarmic {
namespace Common {
namespace detail {
template<typename T>
struct ReverseAdapter {
T& iterable;
auto begin() {
using namespace std;
return rbegin(iterable);
}
auto end() {
using namespace std;
return rend(iterable);
}
};
} // namespace detail
template<typename T>
detail::ReverseAdapter<T> Reverse(T&& iterable) {
return detail::ReverseAdapter<T>{iterable};
}
} // namespace Common
} // namespace Dynarmic

View file

@ -1,192 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/memory_util.h"
#ifdef _WIN32
#include <windows.h>
#include <psapi.h>
#include "common/string_util.h"
#else
#include <cstdlib>
#include <cstring>
#include <sys/mman.h>
#endif
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
#include <unistd.h>
#define PAGE_MASK (getpagesize() - 1)
#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
#endif
// Generic function to get last error message.
// Call directly after the command or use the error num.
// This function might change the error code.
const char* GetLastErrorMsg()
{
static const size_t buff_size = 255;
#ifdef _WIN32
static thread_local char err_str[buff_size] = {};
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
err_str, buff_size, nullptr);
#else
static __thread char err_str[buff_size] = {};
// Thread safe (XSI-compliant)
strerror_r(errno, err_str, buff_size);
#endif
return err_str;
}
// This is purposely not a full wrapper for virtualalloc/mmap, but it
// provides exactly the primitive operations that Dolphin needs.
void* AllocateExecutableMemory(size_t size, bool low)
{
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
static char* map_hint = nullptr;
#if defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
// This OS has no flag to enforce allocation below the 4 GB boundary,
// but if we hint that we want a low address it is very likely we will
// get one.
// An older version of this code used MAP_FIXED, but that has the side
// effect of discarding already mapped pages that happen to be in the
// requested virtual memory range (such as the emulated RAM, sometimes).
if (low && (!map_hint))
map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */
#endif
void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANON | MAP_PRIVATE
#if defined(ARCHITECTURE_X64) && defined(MAP_32BIT)
| (low ? MAP_32BIT : 0)
#endif
, -1, 0);
#endif /* defined(_WIN32) */
#ifdef _WIN32
if (ptr == nullptr)
{
#else
if (ptr == MAP_FAILED)
{
ptr = nullptr;
#endif
ASSERT_MSG(false, "Failed to allocate executable memory");
}
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
else
{
if (low)
{
map_hint += size;
map_hint = (char*)round_page(map_hint); /* round up to the next page */
}
}
#endif
#if EMU_ARCH_BITS == 64
if ((u64)ptr >= 0x80000000 && low == true)
ASSERT_MSG(false, "Executable memory ended up above 2GB!");
#endif
return ptr;
}
void* AllocateMemoryPages(size_t size)
{
#ifdef _WIN32
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
if (ptr == nullptr)
ASSERT_MSG(false, "Failed to allocate raw memory");
return ptr;
}
void* AllocateAlignedMemory(size_t size,size_t alignment)
{
#ifdef _WIN32
void* ptr = _aligned_malloc(size,alignment);
#else
void* ptr = nullptr;
#ifdef ANDROID
ptr = memalign(alignment, size);
#else
if (posix_memalign(&ptr, alignment, size) != 0)
ASSERT_MSG(false, "Failed to allocate aligned memory");
#endif
#endif
if (ptr == nullptr)
ASSERT_MSG(false, "Failed to allocate aligned memory");
return ptr;
}
void FreeMemoryPages(void* ptr, size_t size)
{
if (ptr)
{
#ifdef _WIN32
if (!VirtualFree(ptr, 0, MEM_RELEASE))
ASSERT_MSG(false, "FreeMemoryPages failed!\n%s", GetLastErrorMsg());
#else
munmap(ptr, size);
#endif
}
}
void FreeAlignedMemory(void* ptr)
{
if (ptr)
{
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
}
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
ASSERT_MSG(false, "WriteProtectMemory failed!\n%s", GetLastErrorMsg());
#else
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ);
#endif
}
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
ASSERT_MSG(false, "UnWriteProtectMemory failed!\n%s", GetLastErrorMsg());
#else
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ);
#endif
}
std::string MemUsage()
{
return "";
}

View file

@ -1,19 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <string>
void* AllocateExecutableMemory(size_t size, bool low = true);
void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size,size_t alignment);
void FreeAlignedMemory(void* ptr);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
std::string MemUsage();
inline int GetPageSize() { return 4096; }

View file

@ -1,363 +0,0 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "abi.h"
#include "emitter.h"
using namespace Gen;
// Shared code between Win64 and Unix64
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
#endif
int count = (mask & ABI_ALL_GPRS).Count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
int fpr_count = (mask & ABI_ALL_FPRS).Count();
if (fpr_count) {
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xf;
}
subtraction += 16 * fpr_count;
size_t xmm_base_subtraction = subtraction;
subtraction += needed_frame_size;
subtraction += shadow;
// Final alignment.
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xf;
*shadowp = shadow;
*subtractionp = subtraction;
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int r : mask & ABI_ALL_GPRS)
PUSH((X64Reg)r);
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x : mask & ABI_ALL_FPRS) {
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
xmm_offset += 16;
}
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int x : mask & ABI_ALL_FPRS) {
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
if (subtraction)
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int r = 15; r >= 0; r--) {
if (mask[r])
POP((X64Reg)r);
}
}
// Common functions
void XEmitter::ABI_CallFunction(const void *func) {
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
if (!arg3.IsSimpleReg(ABI_PARAM3))
MOV(32, R(ABI_PARAM3), arg3);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
// Pass a register as a parameter.
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
if (reg2 != ABI_PARAM1) {
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
} else {
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
}
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), Imm64(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}

View file

@ -1,59 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "common/bit_set.h"
#include "emitter.h"
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
// All convensions return values in EAX (+ possibly EDX).
// Windows 64-bit
// * 4-reg "fastcall" variant, very new-skool stack handling
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
// Scratch: RAX RCX RDX R8 R9 R10 R11
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
// Parameters: RCX RDX R8 R9, further MOV-ed
// Linux 64-bit
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
// Callee-save: RBX RBP R12 R13 R14 R15
// Parameters: RDI RSI RDX RCX R8 R9
#define ABI_ALL_FPRS BitSet32(0xffff0000)
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
#define ABI_PARAM1 RCX
#define ABI_PARAM2 RDX
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
#else //64-bit Unix / OS X
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
#define ABI_PARAM3 RDX
#define ABI_PARAM4 RCX
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
ABI_ALL_FPRS)
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
#define ABI_RETURN RAX

View file

@ -1,197 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <string>
#include <thread>
#include "common/common_types.h"
#include "cpu_detect.h"
namespace Common {
#ifndef _MSC_VER
#ifdef __FreeBSD__
#include <sys/types.h>
#include <machine/cpufunc.h>
#endif
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
#ifdef __FreeBSD__
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
#else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__(
"cpuid"
: "=a" (info[0]),
"=b" (info[1]),
"=c" (info[2]),
"=d" (info[3])
: "a" (function_id),
"c" (subfunction_id)
);
#endif
}
static inline void __cpuid(int info[4], int function_id) {
return __cpuidex(info, function_id, 0);
}
#define _XCR_XFEATURE_ENABLED_MASK 0
static u64 _xgetbv(u32 index) {
u32 eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((u64)edx << 32) | eax;
}
#endif // ifndef _MSC_VER
// Detects the various CPU features
static CPUCaps Detect() {
CPUCaps caps = {};
caps.num_cores = std::thread::hardware_concurrency();
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
// Citra at all anyway
int cpu_id[4];
memset(caps.brand_string, 0, sizeof(caps.brand_string));
// Detect CPU's CPUID capabilities and grab CPU string
__cpuid(cpu_id, 0x00000000);
u32 max_std_fn = cpu_id[0]; // EAX
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
if (!strcmp(caps.brand_string, "GenuineIntel"))
caps.vendor = CPUVendor::INTEL;
else if (!strcmp(caps.brand_string, "AuthenticAMD"))
caps.vendor = CPUVendor::AMD;
else
caps.vendor = CPUVendor::OTHER;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4996)
#endif
// Set reasonable default brand string even if brand string not available
strncpy(caps.cpu_string, caps.brand_string, sizeof(caps.cpu_string));
caps.cpu_string[sizeof(caps.cpu_string) - 1] = '\0';
#ifdef _MSC_VER
#pragma warning(pop)
#endif
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
if ((cpu_id[2]) & 1) caps.sse3 = true;
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
if ((cpu_id[3] >> 24) & 1) {
caps.fxsave_fxrstor = true;
}
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
caps.avx = true;
if ((cpu_id[2] >> 12) & 1)
caps.fma = true;
}
}
if (max_std_fn >= 7) {
__cpuidex(cpu_id, 0x00000007, 0x00000000);
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1)
caps.avx2 = caps.avx;
if ((cpu_id[1] >> 3) & 1)
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
}
}
caps.flush_to_zero = caps.sse;
if (max_ex_fn >= 0x80000004) {
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000003);
std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000004);
std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
}
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
}
return caps;
}
const CPUCaps& GetCPUCaps() {
static CPUCaps caps = Detect();
return caps;
}
std::string GetCPUCapsString() {
auto caps = GetCPUCaps();
std::string sum(caps.cpu_string);
sum += " (";
sum += caps.brand_string;
sum += ")";
if (caps.sse) sum += ", SSE";
if (caps.sse2) {
sum += ", SSE2";
if (!caps.flush_to_zero) sum += " (without DAZ)";
}
if (caps.sse3) sum += ", SSE3";
if (caps.ssse3) sum += ", SSSE3";
if (caps.sse4_1) sum += ", SSE4.1";
if (caps.sse4_2) sum += ", SSE4.2";
if (caps.avx) sum += ", AVX";
if (caps.avx2) sum += ", AVX2";
if (caps.bmi1) sum += ", BMI1";
if (caps.bmi2) sum += ", BMI2";
if (caps.fma) sum += ", FMA";
if (caps.aes) sum += ", AES";
if (caps.movbe) sum += ", MOVBE";
if (caps.long_mode) sum += ", 64-bit support";
return sum;
}
} // namespace Common

View file

@ -1,66 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
namespace Common {
/// x86/x64 CPU vendors that may be detected by this module
enum class CPUVendor {
INTEL,
AMD,
OTHER,
};
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
CPUVendor vendor;
char cpu_string[0x21];
char brand_string[0x41];
int num_cores;
bool sse;
bool sse2;
bool sse3;
bool ssse3;
bool sse4_1;
bool sse4_2;
bool lzcnt;
bool avx;
bool avx2;
bool bmi1;
bool bmi2;
bool fma;
bool fma4;
bool aes;
// Support for the FXSAVE and FXRSTOR instructions
bool fxsave_fxrstor;
bool movbe;
// This flag indicates that the hardware supports some mode in which denormal inputs and outputs
// are automatically set to (signed) zero.
bool flush_to_zero;
// Support for LAHF and SAHF instructions in 64-bit mode
bool lahf_sahf_64;
bool long_mode;
};
/**
* Gets the supported capabilities of the host CPU
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
*/
const CPUCaps& GetCPUCaps();
/**
* Gets a string summary of the name and supported capabilities of the host CPU
* @return String summary
*/
std::string GetCPUCapsString();
} // namespace Common

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -22,11 +22,11 @@ public:
}
std::string thumb16_LSR_imm(Imm5 imm5, Reg m, Reg d) {
return Common::StringFromFormat("lsrs %s, %s, #%u", RegToString(d), RegToString(m), imm5);
return Common::StringFromFormat("lsrs %s, %s, #%u", RegToString(d), RegToString(m), imm5 != 0 ? imm5 : 32);
}
std::string thumb16_ASR_imm(Imm5 imm5, Reg m, Reg d) {
return Common::StringFromFormat("asrs %s, %s, #%u", RegToString(d), RegToString(m), imm5);
return Common::StringFromFormat("asrs %s, %s, #%u", RegToString(d), RegToString(m), imm5 != 0 ? imm5 : 32);
}
std::string thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) {