Merge branch 'xbyak'
This commit is contained in:
commit
922d1fd198
27 changed files with 1648 additions and 5323 deletions
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
[submodule "externals/xbyak"]
|
||||
path = externals/xbyak
|
||||
url = https://github.com/herumi/xbyak
|
|
@ -29,6 +29,7 @@ else()
|
|||
-Wfatal-errors
|
||||
-Wno-unused-parameter
|
||||
-Wno-missing-braces)
|
||||
add_compile_options(-fno-operator-names)
|
||||
|
||||
if (ARCHITECTURE_x86_64)
|
||||
add_compile_options(-msse4.1)
|
||||
|
@ -67,6 +68,9 @@ include_directories(${Boost_INCLUDE_DIRS})
|
|||
include_directories(externals/catch)
|
||||
enable_testing(true) # Enables unit-testing.
|
||||
|
||||
# Include Xbyak
|
||||
include_directories(externals/xbyak/xbyak)
|
||||
|
||||
# Include LLVM
|
||||
if (DYNARMIC_USE_LLVM)
|
||||
find_package(LLVM REQUIRED CONFIG)
|
||||
|
|
1
externals/xbyak
vendored
Submodule
1
externals/xbyak
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 4cc35dbec0e2eb4d66205f12ea3fab9d8622f99f
|
|
@ -2,17 +2,15 @@ include_directories(.)
|
|||
include(CreateDirectoryGroups)
|
||||
|
||||
set(SRCS
|
||||
backend_x64/abi.cpp
|
||||
backend_x64/block_of_code.cpp
|
||||
backend_x64/emit_x64.cpp
|
||||
backend_x64/hostloc.cpp
|
||||
backend_x64/interface_x64.cpp
|
||||
backend_x64/jitstate.cpp
|
||||
backend_x64/reg_alloc.cpp
|
||||
common/memory_pool.cpp
|
||||
common/memory_util.cpp
|
||||
common/string_util.cpp
|
||||
common/x64/abi.cpp
|
||||
common/x64/cpu_detect.cpp
|
||||
common/x64/emitter.cpp
|
||||
frontend/arm_types.cpp
|
||||
frontend/disassembler/disassembler_arm.cpp
|
||||
frontend/disassembler/disassembler_thumb.cpp
|
||||
|
@ -41,24 +39,21 @@ set(SRCS
|
|||
)
|
||||
|
||||
set(HEADERS
|
||||
backend_x64/abi.h
|
||||
backend_x64/block_of_code.h
|
||||
backend_x64/emit_x64.h
|
||||
backend_x64/hostloc.h
|
||||
backend_x64/jitstate.h
|
||||
backend_x64/reg_alloc.h
|
||||
common/assert.h
|
||||
common/bit_set.h
|
||||
common/bit_util.h
|
||||
common/code_block.h
|
||||
common/common_types.h
|
||||
common/intrusive_list.h
|
||||
common/iterator_util.h
|
||||
common/memory_pool.h
|
||||
common/memory_util.h
|
||||
common/mp.h
|
||||
common/scope_exit.h
|
||||
common/string_util.h
|
||||
common/x64/abi.h
|
||||
common/x64/cpu_detect.h
|
||||
common/x64/emitter.h
|
||||
frontend/arm_types.h
|
||||
frontend/arm/FPSCR.h
|
||||
frontend/decoder/arm.h
|
||||
|
|
113
src/backend_x64/abi.cpp
Normal file
113
src/backend_x64/abi.cpp
Normal file
|
@ -0,0 +1,113 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// 24th August 2016: This code was modified for Dynarmic.
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/iterator_util.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
constexpr size_t GPR_SIZE = 8;
|
||||
constexpr size_t XMM_SIZE = 16;
|
||||
|
||||
struct FrameInfo {
|
||||
size_t stack_subtraction = 0;
|
||||
size_t xmm_offset = 0;
|
||||
};
|
||||
|
||||
static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t frame_size) {
|
||||
FrameInfo frame_info = {};
|
||||
|
||||
size_t rsp_alignment = 8; // We are always 8-byte aligned initially
|
||||
rsp_alignment -= num_gprs * GPR_SIZE;
|
||||
|
||||
if (num_xmms > 0) {
|
||||
frame_info.stack_subtraction = rsp_alignment & 0xF;
|
||||
frame_info.stack_subtraction += num_xmms * XMM_SIZE;
|
||||
}
|
||||
|
||||
size_t xmm_base = frame_info.stack_subtraction;
|
||||
|
||||
frame_info.stack_subtraction += frame_size;
|
||||
frame_info.stack_subtraction += ABI_SHADOW_SPACE;
|
||||
|
||||
rsp_alignment -= frame_info.stack_subtraction;
|
||||
frame_info.stack_subtraction += rsp_alignment & 0xF;
|
||||
|
||||
frame_info.xmm_offset = frame_info.stack_subtraction - xmm_base;
|
||||
|
||||
return frame_info;
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsXMM);
|
||||
|
||||
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
for (HostLoc gpr : ABI_ALL_CALLEE_SAVE) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
code->push(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
code->sub(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (HostLoc xmm : ABI_ALL_CALLEE_SAVE) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
code->movaps(code->xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLocIsXMM);
|
||||
|
||||
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (HostLoc xmm : Common::Reverse(ABI_ALL_CALLEE_SAVE)) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
code->movaps(HostLocToXmm(xmm), code->xword[rsp + xmm_offset]);
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
code->add(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
for (HostLoc gpr : Common::Reverse(ABI_ALL_CALLEE_SAVE)) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
code->pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
119
src/backend_x64/abi.h
Normal file
119
src/backend_x64/abi.h
Normal file
|
@ -0,0 +1,119 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "backend_x64/hostloc.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
||||
constexpr HostLoc ABI_PARAM1 = HostLoc::RCX;
|
||||
constexpr HostLoc ABI_PARAM2 = HostLoc::RDX;
|
||||
constexpr HostLoc ABI_PARAM3 = HostLoc::R8;
|
||||
constexpr HostLoc ABI_PARAM4 = HostLoc::R9;
|
||||
|
||||
constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
};
|
||||
|
||||
constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
|
||||
HostLoc::RBX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
|
||||
|
||||
#else
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
||||
constexpr HostLoc ABI_PARAM1 = HostLoc::RDI;
|
||||
constexpr HostLoc ABI_PARAM2 = HostLoc::RSI;
|
||||
constexpr HostLoc ABI_PARAM3 = HostLoc::RDX;
|
||||
constexpr HostLoc ABI_PARAM4 = HostLoc::RCX;
|
||||
|
||||
constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RSI,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
|
||||
HostLoc::RBX,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
};
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||
|
||||
#endif
|
||||
|
||||
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator* code, size_t frame_size = 0);
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
|
@ -6,27 +6,24 @@
|
|||
|
||||
#include <limits>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/abi.h"
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/x64/abi.h"
|
||||
|
||||
using namespace Gen;
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
BlockOfCode::BlockOfCode() : Gen::XCodeBlock() {
|
||||
AllocCodeSpace(128 * 1024 * 1024);
|
||||
BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) {
|
||||
ClearCache(false);
|
||||
}
|
||||
|
||||
void BlockOfCode::ClearCache(bool poison_memory) {
|
||||
if (poison_memory) {
|
||||
ClearCodeSpace();
|
||||
} else {
|
||||
ResetCodePtr();
|
||||
}
|
||||
|
||||
consts.~Consts();
|
||||
new (&consts) Consts();
|
||||
reset();
|
||||
GenConstants();
|
||||
GenRunCode();
|
||||
GenReturnFromRunCode();
|
||||
|
@ -42,68 +39,116 @@ size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cyc
|
|||
}
|
||||
|
||||
void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
|
||||
JMP(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch, true);
|
||||
jmp(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch);
|
||||
}
|
||||
|
||||
void BlockOfCode::GenConstants() {
|
||||
const_FloatNegativeZero32 = AlignCode16();
|
||||
Write32(0x80000000u);
|
||||
const_FloatNaN32 = AlignCode16();
|
||||
Write32(0x7fc00000u);
|
||||
const_FloatNonSignMask32 = AlignCode16();
|
||||
Write64(0x7fffffffu);
|
||||
const_FloatNegativeZero64 = AlignCode16();
|
||||
Write64(0x8000000000000000u);
|
||||
const_FloatNaN64 = AlignCode16();
|
||||
Write64(0x7ff8000000000000u);
|
||||
const_FloatNonSignMask64 = AlignCode16();
|
||||
Write64(0x7fffffffffffffffu);
|
||||
const_FloatPenultimatePositiveDenormal64 = AlignCode16();
|
||||
Write64(0x000ffffffffffffeu);
|
||||
const_FloatMinS32 = AlignCode16();
|
||||
Write64(0xc1e0000000000000u); // -2147483648 as a double
|
||||
const_FloatMaxS32 = AlignCode16();
|
||||
Write64(0x41dfffffffc00000u); // 2147483647 as a double
|
||||
const_FloatPositiveZero32 = const_FloatPositiveZero64 = const_FloatMinU32 = AlignCode16();
|
||||
Write64(0x0000000000000000u); // 0 as a double
|
||||
const_FloatMaxU32 = AlignCode16();
|
||||
Write64(0x41efffffffe00000u); // 4294967295 as a double
|
||||
AlignCode16();
|
||||
align();
|
||||
L(consts.FloatNegativeZero32);
|
||||
dd(0x80000000u);
|
||||
|
||||
align();
|
||||
L(consts.FloatNaN32);
|
||||
dd(0x7fc00000u);
|
||||
|
||||
align();
|
||||
L(consts.FloatNonSignMask32);
|
||||
dq(0x7fffffffu);
|
||||
|
||||
align();
|
||||
L(consts.FloatNegativeZero64);
|
||||
dq(0x8000000000000000u);
|
||||
|
||||
align();
|
||||
L(consts.FloatNaN64);
|
||||
dq(0x7ff8000000000000u);
|
||||
|
||||
align();
|
||||
L(consts.FloatNonSignMask64);
|
||||
dq(0x7fffffffffffffffu);
|
||||
|
||||
align();
|
||||
L(consts.FloatPenultimatePositiveDenormal64);
|
||||
dq(0x000ffffffffffffeu);
|
||||
|
||||
align();
|
||||
L(consts.FloatMinS32);
|
||||
dq(0xc1e0000000000000u); // -2147483648 as a double
|
||||
|
||||
align();
|
||||
L(consts.FloatMaxS32);
|
||||
dq(0x41dfffffffc00000u); // 2147483647 as a double
|
||||
|
||||
align();
|
||||
L(consts.FloatPositiveZero32);
|
||||
L(consts.FloatPositiveZero64);
|
||||
L(consts.FloatMinU32);
|
||||
dq(0x0000000000000000u); // 0 as a double
|
||||
|
||||
align();
|
||||
L(consts.FloatMaxU32);
|
||||
dq(0x41efffffffe00000u); // 4294967295 as a double
|
||||
|
||||
align();
|
||||
}
|
||||
|
||||
void BlockOfCode::GenRunCode() {
|
||||
run_code = reinterpret_cast<RunCodeFuncType>(const_cast<u8*>(GetCodePtr()));
|
||||
align();
|
||||
run_code = getCurr<RunCodeFuncType>();
|
||||
|
||||
// This serves two purposes:
|
||||
// 1. It saves all the registers we as a callee need to save.
|
||||
// 2. It aligns the stack so that the code the JIT emits can assume
|
||||
// that the stack is appropriately aligned for CALLs.
|
||||
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(this);
|
||||
|
||||
MOV(64, R(R15), R(ABI_PARAM1));
|
||||
mov(r15, ABI_PARAM1);
|
||||
SwitchMxcsrOnEntry();
|
||||
JMPptr(R(ABI_PARAM2));
|
||||
jmp(ABI_PARAM2);
|
||||
}
|
||||
|
||||
void BlockOfCode::GenReturnFromRunCode() {
|
||||
return_from_run_code = GetCodePtr();
|
||||
return_from_run_code = getCurr<const void*>();
|
||||
|
||||
SwitchMxcsrOnExit();
|
||||
|
||||
return_from_run_code_without_mxcsr_switch = GetCodePtr();
|
||||
return_from_run_code_without_mxcsr_switch = getCurr<const void*>();
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
|
||||
RET();
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(this);
|
||||
ret();
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
|
||||
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
|
||||
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
||||
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnExit() {
|
||||
STMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));
|
||||
LDMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
|
||||
stmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
|
||||
ldmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
||||
}
|
||||
|
||||
void BlockOfCode::CallFunction(const void* fn) {
|
||||
u64 distance = u64(fn) - (getCurr<u64>() + 5);
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
mov(rax, u64(fn));
|
||||
call(rax);
|
||||
} else {
|
||||
call(fn);
|
||||
}
|
||||
}
|
||||
|
||||
void BlockOfCode::SetCodePtr(CodePtr ptr) {
|
||||
// The "size" defines where top_, the insertion point, is.
|
||||
size_t required_size = reinterpret_cast<const u8*>(ptr) - getCode();
|
||||
setSize(required_size);
|
||||
}
|
||||
|
||||
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
|
||||
size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin);
|
||||
ASSERT(current_size <= size);
|
||||
nop(size - current_size);
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
|
|
|
@ -7,15 +7,17 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/x64/emitter.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
class BlockOfCode final : public Gen::XCodeBlock {
|
||||
class BlockOfCode final : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
BlockOfCode();
|
||||
|
||||
|
@ -30,73 +32,101 @@ public:
|
|||
void SwitchMxcsrOnEntry();
|
||||
/// Code emitter: Makes saved host MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnExit();
|
||||
/// Code emitter: Calls the function
|
||||
void CallFunction(const void* fn);
|
||||
|
||||
Gen::OpArg MFloatPositiveZero32() const {
|
||||
return Gen::M(const_FloatPositiveZero32);
|
||||
Xbyak::Address MFloatPositiveZero32() {
|
||||
return xword[rip + consts.FloatPositiveZero32];
|
||||
}
|
||||
Gen::OpArg MFloatNegativeZero32() const {
|
||||
return Gen::M(const_FloatNegativeZero32);
|
||||
Xbyak::Address MFloatNegativeZero32() {
|
||||
return xword[rip + consts.FloatNegativeZero32];
|
||||
}
|
||||
Gen::OpArg MFloatNaN32() const {
|
||||
return Gen::M(const_FloatNaN32);
|
||||
Xbyak::Address MFloatNaN32() {
|
||||
return xword[rip + consts.FloatNaN32];
|
||||
}
|
||||
Gen::OpArg MFloatNonSignMask32() const {
|
||||
return Gen::M(const_FloatNonSignMask32);
|
||||
Xbyak::Address MFloatNonSignMask32() {
|
||||
return xword[rip + consts.FloatNonSignMask32];
|
||||
}
|
||||
Gen::OpArg MFloatPositiveZero64() const {
|
||||
return Gen::M(const_FloatPositiveZero64);
|
||||
Xbyak::Address MFloatPositiveZero64() {
|
||||
return xword[rip + consts.FloatPositiveZero64];
|
||||
}
|
||||
Gen::OpArg MFloatNegativeZero64() const {
|
||||
return Gen::M(const_FloatNegativeZero64);
|
||||
Xbyak::Address MFloatNegativeZero64() {
|
||||
return xword[rip + consts.FloatNegativeZero64];
|
||||
}
|
||||
Gen::OpArg MFloatNaN64() const {
|
||||
return Gen::M(const_FloatNaN64);
|
||||
Xbyak::Address MFloatNaN64() {
|
||||
return xword[rip + consts.FloatNaN64];
|
||||
}
|
||||
Gen::OpArg MFloatNonSignMask64() const {
|
||||
return Gen::M(const_FloatNonSignMask64);
|
||||
Xbyak::Address MFloatNonSignMask64() {
|
||||
return xword[rip + consts.FloatNonSignMask64];
|
||||
}
|
||||
Gen::OpArg MFloatPenultimatePositiveDenormal64() const {
|
||||
return Gen::M(const_FloatPenultimatePositiveDenormal64);
|
||||
Xbyak::Address MFloatPenultimatePositiveDenormal64() {
|
||||
return xword[rip + consts.FloatPenultimatePositiveDenormal64];
|
||||
}
|
||||
Gen::OpArg MFloatMinS32() const {
|
||||
return Gen::M(const_FloatMinS32);
|
||||
Xbyak::Address MFloatMinS32() {
|
||||
return xword[rip + consts.FloatMinS32];
|
||||
}
|
||||
Gen::OpArg MFloatMaxS32() const {
|
||||
return Gen::M(const_FloatMaxS32);
|
||||
Xbyak::Address MFloatMaxS32() {
|
||||
return xword[rip + consts.FloatMaxS32];
|
||||
}
|
||||
Gen::OpArg MFloatMinU32() const {
|
||||
return Gen::M(const_FloatMinU32);
|
||||
Xbyak::Address MFloatMinU32() {
|
||||
return xword[rip + consts.FloatMinU32];
|
||||
}
|
||||
Gen::OpArg MFloatMaxU32() const {
|
||||
return Gen::M(const_FloatMaxU32);
|
||||
Xbyak::Address MFloatMaxU32() {
|
||||
return xword[rip + consts.FloatMaxU32];
|
||||
}
|
||||
|
||||
CodePtr GetReturnFromRunCodeAddress() const {
|
||||
const void* GetReturnFromRunCodeAddress() const {
|
||||
return return_from_run_code;
|
||||
}
|
||||
|
||||
void int3() { db(0xCC); }
|
||||
void nop(size_t size = 0) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
db(0x90);
|
||||
}
|
||||
}
|
||||
|
||||
void SetCodePtr(CodePtr ptr);
|
||||
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||
|
||||
#ifdef _WIN32
|
||||
Xbyak::Reg64 ABI_RETURN = rax;
|
||||
Xbyak::Reg64 ABI_PARAM1 = rcx;
|
||||
Xbyak::Reg64 ABI_PARAM2 = rdx;
|
||||
Xbyak::Reg64 ABI_PARAM3 = r8;
|
||||
Xbyak::Reg64 ABI_PARAM4 = r9;
|
||||
#else
|
||||
Xbyak::Reg64 ABI_RETURN = rax;
|
||||
Xbyak::Reg64 ABI_PARAM1 = rdi;
|
||||
Xbyak::Reg64 ABI_PARAM2 = rsi;
|
||||
Xbyak::Reg64 ABI_PARAM3 = rdx;
|
||||
Xbyak::Reg64 ABI_PARAM4 = rcx;
|
||||
#endif
|
||||
|
||||
private:
|
||||
const u8* const_FloatPositiveZero32 = nullptr;
|
||||
const u8* const_FloatNegativeZero32 = nullptr;
|
||||
const u8* const_FloatNaN32 = nullptr;
|
||||
const u8* const_FloatNonSignMask32 = nullptr;
|
||||
const u8* const_FloatPositiveZero64 = nullptr;
|
||||
const u8* const_FloatNegativeZero64 = nullptr;
|
||||
const u8* const_FloatNaN64 = nullptr;
|
||||
const u8* const_FloatNonSignMask64 = nullptr;
|
||||
const u8* const_FloatPenultimatePositiveDenormal64 = nullptr;
|
||||
const u8* const_FloatMinS32 = nullptr;
|
||||
const u8* const_FloatMaxS32 = nullptr;
|
||||
const u8* const_FloatMinU32 = nullptr;
|
||||
const u8* const_FloatMaxU32 = nullptr;
|
||||
struct Consts {
|
||||
Xbyak::Label FloatPositiveZero32;
|
||||
Xbyak::Label FloatNegativeZero32;
|
||||
Xbyak::Label FloatNaN32;
|
||||
Xbyak::Label FloatNonSignMask32;
|
||||
Xbyak::Label FloatPositiveZero64;
|
||||
Xbyak::Label FloatNegativeZero64;
|
||||
Xbyak::Label FloatNaN64;
|
||||
Xbyak::Label FloatNonSignMask64;
|
||||
Xbyak::Label FloatPenultimatePositiveDenormal64;
|
||||
Xbyak::Label FloatMinS32;
|
||||
Xbyak::Label FloatMaxS32;
|
||||
Xbyak::Label FloatMinU32;
|
||||
Xbyak::Label FloatMaxU32;
|
||||
} consts;
|
||||
void GenConstants();
|
||||
|
||||
using RunCodeFuncType = void(*)(JitState*, CodePtr);
|
||||
RunCodeFuncType run_code = nullptr;
|
||||
void GenRunCode();
|
||||
|
||||
CodePtr return_from_run_code = nullptr;
|
||||
CodePtr return_from_run_code_without_mxcsr_switch = nullptr;
|
||||
const void* return_from_run_code = nullptr;
|
||||
const void* return_from_run_code_without_mxcsr_switch = nullptr;
|
||||
void GenReturnFromRunCode();
|
||||
};
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -11,10 +11,10 @@
|
|||
#include <vector>
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/reg_alloc.h"
|
||||
#include "common/x64/emitter.h"
|
||||
#include "frontend/arm_types.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
|
|
35
src/backend_x64/hostloc.cpp
Normal file
35
src/backend_x64/hostloc.cpp
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend_x64/hostloc.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
|
||||
DEBUG_ASSERT(HostLocIsGPR(loc));
|
||||
DEBUG_ASSERT(loc != HostLoc::RSP);
|
||||
DEBUG_ASSERT(loc != HostLoc::R15);
|
||||
return Xbyak::Reg64(static_cast<int>(loc));
|
||||
}
|
||||
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
|
||||
DEBUG_ASSERT(HostLocIsXMM(loc));
|
||||
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
||||
}
|
||||
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
|
||||
DEBUG_ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
return qword[r15 + offsetof(JitState, Spill) + i * sizeof(u64)];
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
98
src/backend_x64/hostloc.h
Normal file
98
src/backend_x64/hostloc.h
Normal file
|
@ -0,0 +1,98 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
enum class HostLoc {
|
||||
// Ordering of the registers is intentional. See also: HostLocToX64.
|
||||
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
||||
CF, PF, AF, ZF, SF, OF,
|
||||
FirstSpill,
|
||||
};
|
||||
|
||||
constexpr size_t HostLocCount = static_cast<size_t>(HostLoc::FirstSpill) + SpillCount;
|
||||
|
||||
inline bool HostLocIsGPR(HostLoc reg) {
|
||||
return reg >= HostLoc::RAX && reg <= HostLoc::R14;
|
||||
}
|
||||
|
||||
inline bool HostLocIsXMM(HostLoc reg) {
|
||||
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsRegister(HostLoc reg) {
|
||||
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
|
||||
}
|
||||
|
||||
inline bool HostLocIsFlag(HostLoc reg) {
|
||||
return reg >= HostLoc::CF && reg <= HostLoc::OF;
|
||||
}
|
||||
|
||||
inline HostLoc HostLocSpill(size_t i) {
|
||||
ASSERT_MSG(i < SpillCount, "Invalid spill");
|
||||
return static_cast<HostLoc>(static_cast<int>(HostLoc::FirstSpill) + i);
|
||||
}
|
||||
|
||||
inline bool HostLocIsSpill(HostLoc reg) {
|
||||
return reg >= HostLoc::FirstSpill && reg <= HostLocSpill(SpillCount - 1);
|
||||
}
|
||||
|
||||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
|
||||
// RSP is preserved for function calls
|
||||
// R15 contains the JitState pointer
|
||||
const HostLocList any_gpr = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
};
|
||||
|
||||
const HostLocList any_xmm = {
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc);
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
|
@ -15,7 +15,7 @@ namespace BackendX64 {
|
|||
|
||||
class BlockOfCode;
|
||||
|
||||
constexpr size_t SpillCount = 32;
|
||||
constexpr size_t SpillCount = 64;
|
||||
|
||||
struct JitState {
|
||||
explicit JitState(const BlockOfCode* code) { ResetRSB(code); }
|
||||
|
@ -54,7 +54,7 @@ struct JitState {
|
|||
void SetFpscr(u32 FPSCR);
|
||||
};
|
||||
|
||||
using CodePtr = const u8*;
|
||||
using CodePtr = const void*;
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
||||
|
|
|
@ -7,52 +7,42 @@
|
|||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "backend_x64/reg_alloc.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/x64/emitter.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
static Gen::OpArg ImmediateToOpArg(const IR::Value& imm) {
|
||||
static u32 ImmediateToU32(const IR::Value& imm) {
|
||||
switch (imm.GetType()) {
|
||||
case IR::Type::U1:
|
||||
return Gen::Imm32(imm.GetU1());
|
||||
return u32(imm.GetU1());
|
||||
break;
|
||||
case IR::Type::U8:
|
||||
return Gen::Imm32(imm.GetU8());
|
||||
return u32(imm.GetU8());
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
return Gen::Imm32(imm.GetU32());
|
||||
return u32(imm.GetU32());
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "This should never happen.");
|
||||
}
|
||||
}
|
||||
|
||||
static Gen::X64Reg HostLocToX64(HostLoc loc) {
|
||||
DEBUG_ASSERT(HostLocIsRegister(loc));
|
||||
DEBUG_ASSERT(loc != HostLoc::RSP);
|
||||
// HostLoc is ordered such that the numbers line up.
|
||||
if (HostLocIsGPR(loc)) {
|
||||
return static_cast<Gen::X64Reg>(loc);
|
||||
static Xbyak::Reg HostLocToX64(HostLoc hostloc) {
|
||||
if (HostLocIsGPR(hostloc)) {
|
||||
return HostLocToReg64(hostloc);
|
||||
}
|
||||
if (HostLocIsXMM(loc)) {
|
||||
return static_cast<Gen::X64Reg>(size_t(loc) - size_t(HostLoc::XMM0));
|
||||
if (HostLocIsXMM(hostloc)) {
|
||||
return HostLocToXmm(hostloc);
|
||||
}
|
||||
ASSERT_MSG(false, "This should never happen.");
|
||||
return Gen::INVALID_REG;
|
||||
}
|
||||
|
||||
static Gen::OpArg SpillToOpArg(HostLoc loc) {
|
||||
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
|
||||
DEBUG_ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
return Gen::MDisp(Gen::R15, static_cast<int>(offsetof(JitState, Spill) + i * sizeof(u64)));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
|
||||
|
@ -66,14 +56,14 @@ Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, HostLocList desired_locati
|
|||
LocInfo(location).def = def_inst;
|
||||
|
||||
DEBUG_ASSERT(LocInfo(location).IsDef());
|
||||
return HostLocToX64(location);
|
||||
return location;
|
||||
}
|
||||
|
||||
void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) {
|
||||
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
|
||||
if (use_inst.IsImmediate()) {
|
||||
LoadImmediateIntoRegister(use_inst, DefRegister(def_inst, any_gpr));
|
||||
LoadImmediateIntoHostLocReg(use_inst, DefHostLocReg(def_inst, any_gpr));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -84,15 +74,15 @@ void RegAlloc::RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst) {
|
|||
DEBUG_ASSERT(LocInfo(location).IsIdle());
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseDefRegister(use_value.GetInst(), def_inst, desired_locations);
|
||||
return UseDefHostLocReg(use_value.GetInst(), def_inst, desired_locations);
|
||||
}
|
||||
|
||||
return LoadImmediateIntoRegister(use_value, DefRegister(def_inst, desired_locations));
|
||||
return LoadImmediateIntoHostLocReg(use_value, DefHostLocReg(def_inst, desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined");
|
||||
|
@ -112,9 +102,9 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos
|
|||
EmitMove(new_location, current_location);
|
||||
LocInfo(new_location) = LocInfo(current_location);
|
||||
LocInfo(current_location) = {};
|
||||
return HostLocToX64(new_location);
|
||||
return new_location;
|
||||
} else {
|
||||
return HostLocToX64(current_location);
|
||||
return current_location;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -123,17 +113,17 @@ Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, Hos
|
|||
if (is_floating_point) {
|
||||
DEBUG_ASSERT(use_inst->GetType() == IR::Type::F32 || use_inst->GetType() == IR::Type::F64);
|
||||
}
|
||||
Gen::X64Reg use_reg = UseRegister(use_inst, is_floating_point ? any_xmm : any_gpr);
|
||||
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
|
||||
HostLoc use_reg = UseHostLocReg(use_inst, is_floating_point ? any_xmm : any_gpr);
|
||||
HostLoc def_reg = DefHostLocReg(def_inst, desired_locations);
|
||||
if (is_floating_point) {
|
||||
code->MOVAPD(def_reg, Gen::R(use_reg));
|
||||
code->movapd(HostLocToXmm(def_reg), HostLocToXmm(use_reg));
|
||||
} else {
|
||||
code->MOV(64, Gen::R(def_reg), Gen::R(use_reg));
|
||||
code->mov(HostLocToReg64(def_reg), HostLocToReg64(use_reg));
|
||||
}
|
||||
return def_reg;
|
||||
}
|
||||
|
||||
std::tuple<Gen::OpArg, Gen::X64Reg> RegAlloc::UseDefOpArg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
std::tuple<OpArg, HostLoc> RegAlloc::UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
DEBUG_ASSERT_MSG(use_value.IsImmediate() || ValueLocation(use_value.GetInst()), "use_inst has not been defined");
|
||||
|
@ -148,37 +138,37 @@ std::tuple<Gen::OpArg, Gen::X64Reg> RegAlloc::UseDefOpArg(IR::Value use_value, I
|
|||
if (HostLocIsSpill(current_location)) {
|
||||
loc_info.is_being_used = true;
|
||||
DEBUG_ASSERT(loc_info.IsUse());
|
||||
return std::make_tuple(SpillToOpArg(current_location), DefRegister(def_inst, desired_locations));
|
||||
return std::make_tuple(SpillToOpArg(current_location), DefHostLocReg(def_inst, desired_locations));
|
||||
} else {
|
||||
loc_info.is_being_used = true;
|
||||
loc_info.def = def_inst;
|
||||
DEBUG_ASSERT(loc_info.IsUseDef());
|
||||
return std::make_tuple(Gen::R(HostLocToX64(current_location)), HostLocToX64(current_location));
|
||||
return std::make_tuple(HostLocToX64(current_location), current_location);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Gen::OpArg use_oparg = UseOpArg(use_value, any_gpr);
|
||||
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
|
||||
OpArg use_oparg = UseOpArg(use_value, any_gpr);
|
||||
HostLoc def_reg = DefHostLocReg(def_inst, desired_locations);
|
||||
return std::make_tuple(use_oparg, def_reg);
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseHostLocReg(IR::Value use_value, HostLocList desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseRegister(use_value.GetInst(), desired_locations);
|
||||
return UseHostLocReg(use_value.GetInst(), desired_locations);
|
||||
}
|
||||
|
||||
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
|
||||
return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) {
|
||||
HostLoc current_location;
|
||||
bool was_being_used;
|
||||
std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations);
|
||||
|
||||
if (HostLocIsRegister(current_location)) {
|
||||
return HostLocToX64(current_location);
|
||||
return current_location;
|
||||
} else if (HostLocIsSpill(current_location)) {
|
||||
HostLoc new_location = SelectARegister(desired_locations);
|
||||
if (IsRegisterOccupied(new_location)) {
|
||||
|
@ -193,16 +183,15 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, HostLocList desired_locati
|
|||
LocInfo(new_location).is_being_used = true;
|
||||
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
|
||||
}
|
||||
return HostLocToX64(new_location);
|
||||
return new_location;
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Unknown current_location type");
|
||||
return Gen::INVALID_REG;
|
||||
}
|
||||
|
||||
Gen::OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) {
|
||||
OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations) {
|
||||
if (use_value.IsImmediate()) {
|
||||
return ImmediateToOpArg(use_value);
|
||||
return Xbyak::Operand(); // return a None
|
||||
}
|
||||
|
||||
IR::Inst* use_inst = use_value.GetInst();
|
||||
|
@ -212,24 +201,23 @@ Gen::OpArg RegAlloc::UseOpArg(IR::Value use_value, HostLocList desired_locations
|
|||
std::tie(current_location, was_being_used) = UseHostLoc(use_inst, desired_locations);
|
||||
|
||||
if (HostLocIsRegister(current_location)) {
|
||||
return Gen::R(HostLocToX64(current_location));
|
||||
return HostLocToX64(current_location);
|
||||
} else if (HostLocIsSpill(current_location)) {
|
||||
return SpillToOpArg(current_location);
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "Unknown current_location type");
|
||||
return Gen::R(Gen::INVALID_REG);
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value use_value, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseScratchRegister(use_value.GetInst(), desired_locations);
|
||||
return UseScratchHostLocReg(use_value.GetInst(), desired_locations);
|
||||
}
|
||||
|
||||
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
|
||||
return LoadImmediateIntoHostLocReg(use_value, ScratchHostLocReg(desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(ValueLocation(use_inst), "use_inst has not been defined");
|
||||
ASSERT_MSG(use_inst->HasUses(), "use_inst ran out of uses. (Use-d an IR::Inst* too many times)");
|
||||
|
@ -245,7 +233,7 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired
|
|||
LocInfo(new_location).is_being_used = true;
|
||||
DecrementRemainingUses(use_inst);
|
||||
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
|
||||
return HostLocToX64(new_location);
|
||||
return new_location;
|
||||
} else if (HostLocIsRegister(current_location)) {
|
||||
ASSERT(LocInfo(current_location).IsIdle()
|
||||
|| LocInfo(current_location).IsUse()
|
||||
|
@ -261,14 +249,13 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, HostLocList desired
|
|||
LocInfo(new_location).values.clear();
|
||||
DecrementRemainingUses(use_inst);
|
||||
DEBUG_ASSERT(LocInfo(new_location).IsScratch());
|
||||
return HostLocToX64(new_location);
|
||||
return new_location;
|
||||
}
|
||||
|
||||
ASSERT_MSG(0, "Invalid current_location");
|
||||
return Gen::INVALID_REG;
|
||||
ASSERT_MSG(false, "Invalid current_location");
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::ScratchRegister(HostLocList desired_locations) {
|
||||
HostLoc RegAlloc::ScratchHostLocReg(HostLocList desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
|
||||
HostLoc location = SelectARegister(desired_locations);
|
||||
|
@ -281,7 +268,7 @@ Gen::X64Reg RegAlloc::ScratchRegister(HostLocList desired_locations) {
|
|||
LocInfo(location).is_being_used = true;
|
||||
|
||||
DEBUG_ASSERT(LocInfo(location).IsScratch());
|
||||
return HostLocToX64(location);
|
||||
return location;
|
||||
}
|
||||
|
||||
void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1_use, IR::Value arg2_use, IR::Value arg3_use) {
|
||||
|
@ -301,26 +288,26 @@ void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1
|
|||
// TODO: This works but almost certainly leads to suboptimal generated code.
|
||||
|
||||
for (HostLoc caller_save : OtherCallerSave) {
|
||||
ScratchRegister({caller_save});
|
||||
ScratchHostLocReg({caller_save});
|
||||
}
|
||||
|
||||
if (result_def) {
|
||||
DefRegister(result_def, {AbiReturn});
|
||||
DefHostLocReg(result_def, {AbiReturn});
|
||||
} else {
|
||||
ScratchRegister({AbiReturn});
|
||||
ScratchHostLocReg({AbiReturn});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < AbiArgs.size(); i++) {
|
||||
if (!args[i]->IsEmpty()) {
|
||||
UseScratchRegister(*args[i], {AbiArgs[i]});
|
||||
UseScratchHostLocReg(*args[i], {AbiArgs[i]});
|
||||
} else {
|
||||
ScratchRegister({AbiArgs[i]});
|
||||
ScratchHostLocReg({AbiArgs[i]});
|
||||
}
|
||||
}
|
||||
|
||||
// Flush all xmm registers
|
||||
for (auto xmm : any_xmm) {
|
||||
ScratchRegister({xmm});
|
||||
ScratchHostLocReg({xmm});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,17 +408,17 @@ void RegAlloc::Reset() {
|
|||
|
||||
void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
||||
if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||
code->MOVSD(HostLocToX64(to), SpillToOpArg(from));
|
||||
code->movsd(HostLocToXmm(to), SpillToOpArg(from));
|
||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||
code->MOVSD(SpillToOpArg(to), HostLocToX64(from));
|
||||
code->movsd(SpillToOpArg(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||
code->MOVAPS(HostLocToX64(to), Gen::R(HostLocToX64(from)));
|
||||
code->movaps(HostLocToXmm(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
code->MOV(64, Gen::R(HostLocToX64(to)), SpillToOpArg(from));
|
||||
code->mov(HostLocToReg64(to), SpillToOpArg(from));
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
code->MOV(64, SpillToOpArg(to), Gen::R(HostLocToX64(from)));
|
||||
code->mov(SpillToOpArg(to), HostLocToReg64(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)){
|
||||
code->MOV(64, Gen::R(HostLocToX64(to)), Gen::R(HostLocToX64(from)));
|
||||
code->mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
|
||||
}
|
||||
|
@ -439,7 +426,7 @@ void RegAlloc::EmitMove(HostLoc to, HostLoc from) {
|
|||
|
||||
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
||||
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
|
||||
code->XCHG(64, Gen::R(HostLocToX64(a)), Gen::R(HostLocToX64(b)));
|
||||
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
|
||||
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
|
||||
ASSERT_MSG(false, "Exchange is unnecessary for XMM registers");
|
||||
} else {
|
||||
|
@ -496,14 +483,17 @@ std::tuple<HostLoc, bool> RegAlloc::UseHostLoc(IR::Inst* use_inst, HostLocList d
|
|||
return std::make_tuple(static_cast<HostLoc>(-1), false);
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg) {
|
||||
HostLoc RegAlloc::LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc host_loc) {
|
||||
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
|
||||
Gen::OpArg op_arg = ImmediateToOpArg(imm);
|
||||
if (op_arg.GetImmValue() == 0)
|
||||
code->XOR(32, Gen::R(reg), Gen::R(reg));
|
||||
|
||||
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||
|
||||
u32 imm_value = ImmediateToU32(imm);
|
||||
if (imm_value == 0)
|
||||
code->xor_(reg, reg);
|
||||
else
|
||||
code->MOV(32, Gen::R(reg), op_arg);
|
||||
return reg;
|
||||
code->mov(reg.cvt32(), imm_value);
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
|
|
|
@ -11,93 +11,55 @@
|
|||
#include <vector>
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend_x64/block_of_code.h"
|
||||
#include "backend_x64/hostloc.h"
|
||||
#include "backend_x64/jitstate.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/x64/emitter.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/value.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
enum class HostLoc {
|
||||
// Ordering of the registers is intentional. See also: HostLocToX64.
|
||||
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
||||
CF, PF, AF, ZF, SF, OF,
|
||||
FirstSpill,
|
||||
struct OpArg {
|
||||
OpArg() : type(OPERAND), inner_operand() {}
|
||||
OpArg(const Xbyak::Address& address) : type(ADDRESS), inner_address(address) {}
|
||||
OpArg(const Xbyak::Operand& operand) : type(OPERAND), inner_operand(operand) {}
|
||||
|
||||
Xbyak::Operand& operator*() {
|
||||
switch (type) {
|
||||
case ADDRESS:
|
||||
return inner_address;
|
||||
case OPERAND:
|
||||
return inner_operand;
|
||||
}
|
||||
ASSERT_MSG(false, "Unreachable");
|
||||
}
|
||||
|
||||
void setBit(int bits) {
|
||||
switch (type) {
|
||||
case ADDRESS:
|
||||
inner_address.setBit(bits);
|
||||
return;
|
||||
case OPERAND:
|
||||
inner_operand.setBit(bits);
|
||||
return;
|
||||
}
|
||||
ASSERT_MSG(false, "Unreachable");
|
||||
}
|
||||
|
||||
private:
|
||||
enum {
|
||||
OPERAND,
|
||||
ADDRESS,
|
||||
} type;
|
||||
|
||||
union {
|
||||
Xbyak::Operand inner_operand;
|
||||
Xbyak::Address inner_address;
|
||||
};
|
||||
|
||||
constexpr size_t HostLocCount = static_cast<size_t>(HostLoc::FirstSpill) + SpillCount;
|
||||
|
||||
enum class HostLocState {
|
||||
Idle, Def, Use, Scratch
|
||||
};
|
||||
|
||||
inline bool HostLocIsGPR(HostLoc reg) {
|
||||
return reg >= HostLoc::RAX && reg <= HostLoc::R14;
|
||||
}
|
||||
|
||||
inline bool HostLocIsXMM(HostLoc reg) {
|
||||
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsRegister(HostLoc reg) {
|
||||
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
|
||||
}
|
||||
|
||||
inline bool HostLocIsFlag(HostLoc reg) {
|
||||
return reg >= HostLoc::CF && reg <= HostLoc::OF;
|
||||
}
|
||||
|
||||
inline HostLoc HostLocSpill(size_t i) {
|
||||
ASSERT_MSG(i < SpillCount, "Invalid spill");
|
||||
return static_cast<HostLoc>(static_cast<int>(HostLoc::FirstSpill) + i);
|
||||
}
|
||||
|
||||
inline bool HostLocIsSpill(HostLoc reg) {
|
||||
return reg >= HostLoc::FirstSpill && reg <= HostLocSpill(SpillCount - 1);
|
||||
}
|
||||
|
||||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
|
||||
const HostLocList any_gpr = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
};
|
||||
|
||||
const HostLocList any_xmm = {
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
class RegAlloc final {
|
||||
|
@ -105,21 +67,54 @@ public:
|
|||
RegAlloc(BlockOfCode* code) : code(code) {}
|
||||
|
||||
/// Late-def
|
||||
Gen::X64Reg DefRegister(IR::Inst* def_inst, HostLocList desired_locations);
|
||||
Xbyak::Reg64 DefGpr(IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
|
||||
return HostLocToReg64(DefHostLocReg(def_inst, desired_locations));
|
||||
}
|
||||
Xbyak::Xmm DefXmm(IR::Inst* def_inst, HostLocList desired_locations = any_xmm) {
|
||||
return HostLocToXmm(DefHostLocReg(def_inst, desired_locations));
|
||||
}
|
||||
void RegisterAddDef(IR::Inst* def_inst, const IR::Value& use_inst);
|
||||
/// Early-use, Late-def
|
||||
Gen::X64Reg UseDefRegister(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
Gen::X64Reg UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
std::tuple<Gen::OpArg, Gen::X64Reg> UseDefOpArg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
Xbyak::Reg64 UseDefGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
|
||||
return HostLocToReg64(UseDefHostLocReg(use_value, def_inst, desired_locations));
|
||||
}
|
||||
Xbyak::Xmm UseDefXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_xmm) {
|
||||
return HostLocToXmm(UseDefHostLocReg(use_value, def_inst, desired_locations));
|
||||
}
|
||||
std::tuple<OpArg, Xbyak::Reg64> UseDefOpArgGpr(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
|
||||
OpArg op;
|
||||
HostLoc host_loc;
|
||||
std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations);
|
||||
return std::make_tuple(op, HostLocToReg64(host_loc));
|
||||
}
|
||||
std::tuple<OpArg, Xbyak::Xmm> UseDefOpArgXmm(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations = any_gpr) {
|
||||
OpArg op;
|
||||
HostLoc host_loc;
|
||||
std::tie(op, host_loc) = UseDefOpArgHostLocReg(use_value, def_inst, desired_locations);
|
||||
return std::make_tuple(op, HostLocToXmm(host_loc));
|
||||
}
|
||||
/// Early-use
|
||||
Gen::X64Reg UseRegister(IR::Value use_value, HostLocList desired_locations);
|
||||
Gen::X64Reg UseRegister(IR::Inst* use_inst, HostLocList desired_locations);
|
||||
Gen::OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations);
|
||||
Xbyak::Reg64 UseGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) {
|
||||
return HostLocToReg64(UseHostLocReg(use_value, desired_locations));
|
||||
}
|
||||
Xbyak::Xmm UseXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) {
|
||||
return HostLocToXmm(UseHostLocReg(use_value, desired_locations));
|
||||
}
|
||||
OpArg UseOpArg(IR::Value use_value, HostLocList desired_locations);
|
||||
/// Early-use, Destroyed
|
||||
Gen::X64Reg UseScratchRegister(IR::Value use_value, HostLocList desired_locations);
|
||||
Gen::X64Reg UseScratchRegister(IR::Inst* use_inst, HostLocList desired_locations);
|
||||
Xbyak::Reg64 UseScratchGpr(IR::Value use_value, HostLocList desired_locations = any_gpr) {
|
||||
return HostLocToReg64(UseScratchHostLocReg(use_value, desired_locations));
|
||||
}
|
||||
Xbyak::Xmm UseScratchXmm(IR::Value use_value, HostLocList desired_locations = any_xmm) {
|
||||
return HostLocToXmm(UseScratchHostLocReg(use_value, desired_locations));
|
||||
}
|
||||
/// Early-def, Late-use, single-use
|
||||
Gen::X64Reg ScratchRegister(HostLocList desired_locations);
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) {
|
||||
return HostLocToReg64(ScratchHostLocReg(desired_locations));
|
||||
}
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) {
|
||||
return HostLocToXmm(ScratchHostLocReg(desired_locations));
|
||||
}
|
||||
|
||||
/// Late-def for result register, Early-use for all arguments, Each value is placed into registers according to host ABI.
|
||||
void HostCall(IR::Inst* result_def = nullptr, IR::Value arg0_use = {}, IR::Value arg1_use = {}, IR::Value arg2_use = {}, IR::Value arg3_use = {});
|
||||
|
@ -141,11 +136,20 @@ private:
|
|||
bool IsRegisterAllocated(HostLoc loc) const;
|
||||
bool IsLastUse(IR::Inst* inst) const;
|
||||
|
||||
HostLoc DefHostLocReg(IR::Inst* def_inst, HostLocList desired_locations);
|
||||
HostLoc UseDefHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
HostLoc UseDefHostLocReg(IR::Inst* use_inst, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
std::tuple<OpArg, HostLoc> UseDefOpArgHostLocReg(IR::Value use_value, IR::Inst* def_inst, HostLocList desired_locations);
|
||||
HostLoc UseHostLocReg(IR::Value use_value, HostLocList desired_locations);
|
||||
HostLoc UseHostLocReg(IR::Inst* use_inst, HostLocList desired_locations);
|
||||
std::tuple<HostLoc, bool> UseHostLoc(IR::Inst* use_inst, HostLocList desired_locations);
|
||||
HostLoc UseScratchHostLocReg(IR::Value use_value, HostLocList desired_locations);
|
||||
HostLoc UseScratchHostLocReg(IR::Inst* use_inst, HostLocList desired_locations);
|
||||
HostLoc ScratchHostLocReg(HostLocList desired_locations);
|
||||
|
||||
void EmitMove(HostLoc to, HostLoc from);
|
||||
void EmitExchange(HostLoc a, HostLoc b);
|
||||
Gen::X64Reg LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg);
|
||||
HostLoc LoadImmediateIntoHostLocReg(IR::Value imm, HostLoc reg);
|
||||
|
||||
void SpillRegister(HostLoc loc);
|
||||
HostLoc FindFreeSpill() const;
|
||||
|
|
|
@ -1,190 +0,0 @@
|
|||
// This file is under the public domain.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <initializer_list>
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
#include "common/common_types.h"
|
||||
|
||||
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
|
||||
namespace Common {
|
||||
|
||||
// Helper functions:
|
||||
|
||||
#ifdef _WIN32
|
||||
template <typename T>
|
||||
static inline int CountSetBits(T v)
|
||||
{
|
||||
// from https://graphics.stanford.edu/~seander/bithacks.html
|
||||
// GCC has this built in, but MSVC's intrinsic will only emit the actual
|
||||
// POPCNT instruction, which we're not depending on
|
||||
v = v - ((v >> 1) & (T)~(T)0/3);
|
||||
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
|
||||
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
||||
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint8_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint16_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint32_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint64_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward64(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
#else
|
||||
static inline int CountSetBits(uint8_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint16_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint32_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint64_t val) { return __builtin_popcountll(val); }
|
||||
static inline int LeastSignificantSetBit(uint8_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint16_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint32_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint64_t val) { return __builtin_ctzll(val); }
|
||||
#endif
|
||||
|
||||
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
|
||||
// using the set bits of an integer to represent a set of integers. Like that
|
||||
// class, it acts like an array of bools:
|
||||
// BitSet32 bs;
|
||||
// bs[1] = true;
|
||||
// but also like the underlying integer ([0] = least significant bit):
|
||||
// BitSet32 bs2 = ...;
|
||||
// bs = (bs ^ bs2) & BitSet32(0xffff);
|
||||
// The following additional functionality is provided:
|
||||
// - Construction using an initializer list.
|
||||
// BitSet bs { 1, 2, 4, 8 };
|
||||
// - Efficiently iterating through the set bits:
|
||||
// for (int i : bs)
|
||||
// [i is the *index* of a set bit]
|
||||
// (This uses the appropriate CPU instruction to find the next set bit in one
|
||||
// operation.)
|
||||
// - Counting set bits using .Count() - see comment on that method.
|
||||
|
||||
// TODO: use constexpr when MSVC gets out of the Dark Ages
|
||||
|
||||
template <typename IntTy>
|
||||
class BitSet
|
||||
{
|
||||
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
|
||||
public:
|
||||
// A reference to a particular bit, returned from operator[].
|
||||
class Ref
|
||||
{
|
||||
public:
|
||||
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
|
||||
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
|
||||
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
|
||||
bool operator=(bool set)
|
||||
{
|
||||
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
|
||||
return set;
|
||||
}
|
||||
private:
|
||||
BitSet* m_bs;
|
||||
IntTy m_mask;
|
||||
};
|
||||
|
||||
// A STL-like iterator is required to be able to use range-based for loops.
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
|
||||
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
|
||||
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
|
||||
int operator*() { return m_bit; }
|
||||
Iterator& operator++()
|
||||
{
|
||||
if (m_val == 0)
|
||||
{
|
||||
m_bit = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bit = LeastSignificantSetBit(m_val);
|
||||
m_val &= ~(1 << bit);
|
||||
m_bit = bit;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
Iterator operator++(int _)
|
||||
{
|
||||
Iterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
|
||||
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
|
||||
private:
|
||||
IntTy m_val;
|
||||
int m_bit;
|
||||
};
|
||||
|
||||
BitSet() : m_val(0) {}
|
||||
explicit BitSet(IntTy val) : m_val(val) {}
|
||||
BitSet(std::initializer_list<int> init)
|
||||
{
|
||||
m_val = 0;
|
||||
for (int bit : init)
|
||||
m_val |= (IntTy)1 << bit;
|
||||
}
|
||||
|
||||
static BitSet AllTrue(size_t count)
|
||||
{
|
||||
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
|
||||
}
|
||||
|
||||
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
|
||||
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
|
||||
bool operator==(BitSet other) const { return m_val == other.m_val; }
|
||||
bool operator!=(BitSet other) const { return m_val != other.m_val; }
|
||||
bool operator<(BitSet other) const { return m_val < other.m_val; }
|
||||
bool operator>(BitSet other) const { return m_val > other.m_val; }
|
||||
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
|
||||
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
BitSet operator~() const { return BitSet(~m_val); }
|
||||
BitSet& operator|=(BitSet other) { return *this = *this | other; }
|
||||
BitSet& operator&=(BitSet other) { return *this = *this & other; }
|
||||
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
|
||||
operator uint32_t() = delete;
|
||||
operator bool() { return m_val != 0; }
|
||||
|
||||
// Warning: Even though on modern CPUs this is a single fast instruction,
|
||||
// Dolphin's official builds do not currently assume POPCNT support on x86,
|
||||
// so slower explicit bit twiddling is generated. Still should generally
|
||||
// be faster than a loop.
|
||||
unsigned int Count() const { return CountSetBits(m_val); }
|
||||
|
||||
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
|
||||
Iterator end() const { return Iterator(m_val, -1); }
|
||||
|
||||
IntTy m_val;
|
||||
};
|
||||
|
||||
} // Common
|
||||
|
||||
typedef Common::BitSet<uint8_t> BitSet8;
|
||||
typedef Common::BitSet<uint16_t> BitSet16;
|
||||
typedef Common::BitSet<uint32_t> BitSet32;
|
||||
typedef Common::BitSet<uint64_t> BitSet64;
|
|
@ -1,92 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/memory_util.h"
|
||||
|
||||
// Everything that needs to generate code should inherit from this.
|
||||
// You get memory management for free, plus, you can use all emitter functions without
|
||||
// having to prefix them with gen-> or something similar.
|
||||
// Example implementation:
|
||||
// class JIT : public CodeBlock<ARMXEmitter> {}
|
||||
template<class T> class CodeBlock : public T
|
||||
{
|
||||
private:
|
||||
// A privately used function to set the executable RAM space to something invalid.
|
||||
// For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
|
||||
virtual void PoisonMemory() = 0;
|
||||
|
||||
protected:
|
||||
u8 *region;
|
||||
size_t region_size;
|
||||
|
||||
public:
|
||||
CodeBlock() : region(nullptr), region_size(0) {}
|
||||
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
|
||||
|
||||
CodeBlock(const CodeBlock&) = delete;
|
||||
CodeBlock& operator=(const CodeBlock&) = delete;
|
||||
|
||||
// Call this before you generate any code.
|
||||
void AllocCodeSpace(int size)
|
||||
{
|
||||
region_size = size;
|
||||
region = (u8*)AllocateExecutableMemory(region_size);
|
||||
T::SetCodePtr(region);
|
||||
}
|
||||
|
||||
// Always clear code space with breakpoints, so that if someone accidentally executes
|
||||
// uninitialized, it just breaks into the debugger.
|
||||
void ClearCodeSpace()
|
||||
{
|
||||
PoisonMemory();
|
||||
ResetCodePtr();
|
||||
}
|
||||
|
||||
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
|
||||
void FreeCodeSpace()
|
||||
{
|
||||
#ifdef __SYMBIAN32__
|
||||
ResetExecutableMemory(region);
|
||||
#else
|
||||
FreeMemoryPages(region, region_size);
|
||||
#endif
|
||||
region = nullptr;
|
||||
region_size = 0;
|
||||
}
|
||||
|
||||
bool IsInSpace(const u8 *ptr)
|
||||
{
|
||||
return (ptr >= region) && (ptr < (region + region_size));
|
||||
}
|
||||
|
||||
// Cannot currently be undone. Will write protect the entire code region.
|
||||
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
|
||||
void WriteProtect()
|
||||
{
|
||||
WriteProtectMemory(region, region_size, true);
|
||||
}
|
||||
|
||||
void ResetCodePtr()
|
||||
{
|
||||
T::SetCodePtr(region);
|
||||
}
|
||||
|
||||
size_t GetSpaceLeft() const
|
||||
{
|
||||
return region_size - (T::GetCodePtr() - region);
|
||||
}
|
||||
|
||||
u8 *GetBasePtr() {
|
||||
return region;
|
||||
}
|
||||
|
||||
size_t GetOffset(const u8 *ptr) const {
|
||||
return ptr - region;
|
||||
}
|
||||
};
|
39
src/common/iterator_util.h
Normal file
39
src/common/iterator_util.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace Common {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<typename T>
|
||||
struct ReverseAdapter {
|
||||
T& iterable;
|
||||
|
||||
auto begin() {
|
||||
using namespace std;
|
||||
return rbegin(iterable);
|
||||
}
|
||||
|
||||
auto end() {
|
||||
using namespace std;
|
||||
return rend(iterable);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename T>
|
||||
detail::ReverseAdapter<T> Reverse(T&& iterable) {
|
||||
return detail::ReverseAdapter<T>{iterable};
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Dynarmic
|
|
@ -1,192 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/memory_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
#include "common/string_util.h"
|
||||
#else
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
#include <unistd.h>
|
||||
#define PAGE_MASK (getpagesize() - 1)
|
||||
#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
|
||||
#endif
|
||||
|
||||
// Generic function to get last error message.
|
||||
// Call directly after the command or use the error num.
|
||||
// This function might change the error code.
|
||||
const char* GetLastErrorMsg()
|
||||
{
|
||||
static const size_t buff_size = 255;
|
||||
|
||||
#ifdef _WIN32
|
||||
static thread_local char err_str[buff_size] = {};
|
||||
|
||||
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
err_str, buff_size, nullptr);
|
||||
#else
|
||||
static __thread char err_str[buff_size] = {};
|
||||
|
||||
// Thread safe (XSI-compliant)
|
||||
strerror_r(errno, err_str, buff_size);
|
||||
#endif
|
||||
|
||||
return err_str;
|
||||
}
|
||||
|
||||
|
||||
// This is purposely not a full wrapper for virtualalloc/mmap, but it
|
||||
// provides exactly the primitive operations that Dolphin needs.
|
||||
|
||||
void* AllocateExecutableMemory(size_t size, bool low)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
#else
|
||||
static char* map_hint = nullptr;
|
||||
#if defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
// This OS has no flag to enforce allocation below the 4 GB boundary,
|
||||
// but if we hint that we want a low address it is very likely we will
|
||||
// get one.
|
||||
// An older version of this code used MAP_FIXED, but that has the side
|
||||
// effect of discarding already mapped pages that happen to be in the
|
||||
// requested virtual memory range (such as the emulated RAM, sometimes).
|
||||
if (low && (!map_hint))
|
||||
map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */
|
||||
#endif
|
||||
void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_ANON | MAP_PRIVATE
|
||||
#if defined(ARCHITECTURE_X64) && defined(MAP_32BIT)
|
||||
| (low ? MAP_32BIT : 0)
|
||||
#endif
|
||||
, -1, 0);
|
||||
#endif /* defined(_WIN32) */
|
||||
|
||||
#ifdef _WIN32
|
||||
if (ptr == nullptr)
|
||||
{
|
||||
#else
|
||||
if (ptr == MAP_FAILED)
|
||||
{
|
||||
ptr = nullptr;
|
||||
#endif
|
||||
ASSERT_MSG(false, "Failed to allocate executable memory");
|
||||
}
|
||||
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
else
|
||||
{
|
||||
if (low)
|
||||
{
|
||||
map_hint += size;
|
||||
map_hint = (char*)round_page(map_hint); /* round up to the next page */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EMU_ARCH_BITS == 64
|
||||
if ((u64)ptr >= 0x80000000 && low == true)
|
||||
ASSERT_MSG(false, "Executable memory ended up above 2GB!");
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* AllocateMemoryPages(size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
#else
|
||||
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
|
||||
if (ptr == MAP_FAILED)
|
||||
ptr = nullptr;
|
||||
#endif
|
||||
|
||||
if (ptr == nullptr)
|
||||
ASSERT_MSG(false, "Failed to allocate raw memory");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* AllocateAlignedMemory(size_t size,size_t alignment)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
void* ptr = _aligned_malloc(size,alignment);
|
||||
#else
|
||||
void* ptr = nullptr;
|
||||
#ifdef ANDROID
|
||||
ptr = memalign(alignment, size);
|
||||
#else
|
||||
if (posix_memalign(&ptr, alignment, size) != 0)
|
||||
ASSERT_MSG(false, "Failed to allocate aligned memory");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (ptr == nullptr)
|
||||
ASSERT_MSG(false, "Failed to allocate aligned memory");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void FreeMemoryPages(void* ptr, size_t size)
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!VirtualFree(ptr, 0, MEM_RELEASE))
|
||||
ASSERT_MSG(false, "FreeMemoryPages failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
munmap(ptr, size);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void FreeAlignedMemory(void* ptr)
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD oldValue;
|
||||
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
|
||||
ASSERT_MSG(false, "WriteProtectMemory failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ);
|
||||
#endif
|
||||
}
|
||||
|
||||
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD oldValue;
|
||||
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
|
||||
ASSERT_MSG(false, "UnWriteProtectMemory failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string MemUsage()
|
||||
{
|
||||
return "";
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
void* AllocateExecutableMemory(size_t size, bool low = true);
|
||||
void* AllocateMemoryPages(size_t size);
|
||||
void FreeMemoryPages(void* ptr, size_t size);
|
||||
void* AllocateAlignedMemory(size_t size,size_t alignment);
|
||||
void FreeAlignedMemory(void* ptr);
|
||||
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
|
||||
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
|
||||
std::string MemUsage();
|
||||
|
||||
inline int GetPageSize() { return 4096; }
|
|
@ -1,363 +0,0 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "abi.h"
|
||||
#include "emitter.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
// Shared code between Win64 and Unix64
|
||||
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
|
||||
size_t shadow = 0;
|
||||
#if defined(_WIN32)
|
||||
shadow = 0x20;
|
||||
#endif
|
||||
|
||||
int count = (mask & ABI_ALL_GPRS).Count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
int fpr_count = (mask & ABI_ALL_FPRS).Count();
|
||||
if (fpr_count) {
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xf;
|
||||
}
|
||||
subtraction += 16 * fpr_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += shadow;
|
||||
// Final alignment.
|
||||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xf;
|
||||
|
||||
*shadowp = shadow;
|
||||
*subtractionp = subtraction;
|
||||
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||
}
|
||||
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int r : mask & ABI_ALL_GPRS)
|
||||
PUSH((X64Reg)r);
|
||||
|
||||
if (subtraction)
|
||||
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
return shadow;
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
if (subtraction)
|
||||
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int r = 15; r >= 0; r--) {
|
||||
if (mask[r])
|
||||
POP((X64Reg)r);
|
||||
}
|
||||
}
|
||||
|
||||
// Common functions
|
||||
void XEmitter::ABI_CallFunction(const void *func) {
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
if (!arg3.IsSimpleReg(ABI_PARAM3))
|
||||
MOV(32, R(ABI_PARAM3), arg3);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass a register as a parameter.
|
||||
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass two registers as parameters.
|
||||
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
|
||||
if (reg2 != ABI_PARAM1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
} else {
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
}
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), Imm64(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_set.h"
|
||||
#include "emitter.h"
|
||||
|
||||
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||
// All convensions return values in EAX (+ possibly EDX).
|
||||
|
||||
// Windows 64-bit
|
||||
// * 4-reg "fastcall" variant, very new-skool stack handling
|
||||
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
|
||||
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
|
||||
// Scratch: RAX RCX RDX R8 R9 R10 R11
|
||||
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
|
||||
// Parameters: RCX RDX R8 R9, further MOV-ed
|
||||
|
||||
// Linux 64-bit
|
||||
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
|
||||
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
|
||||
// Callee-save: RBX RBP R12 R13 R14 R15
|
||||
// Parameters: RDI RSI RDX RCX R8 R9
|
||||
|
||||
#define ABI_ALL_FPRS BitSet32(0xffff0000)
|
||||
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
|
||||
|
||||
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
||||
|
||||
#define ABI_PARAM1 RCX
|
||||
#define ABI_PARAM2 RDX
|
||||
#define ABI_PARAM3 R8
|
||||
#define ABI_PARAM4 R9
|
||||
|
||||
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||
#else //64-bit Unix / OS X
|
||||
|
||||
#define ABI_PARAM1 RDI
|
||||
#define ABI_PARAM2 RSI
|
||||
#define ABI_PARAM3 RDX
|
||||
#define ABI_PARAM4 RCX
|
||||
#define ABI_PARAM5 R8
|
||||
#define ABI_PARAM6 R9
|
||||
|
||||
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
|
||||
// don't actually clobber them.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||
ABI_ALL_FPRS)
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||
|
||||
#define ABI_RETURN RAX
|
|
@ -1,197 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "cpu_detect.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/types.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#endif
|
||||
|
||||
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
|
||||
#ifdef __FreeBSD__
|
||||
// Despite the name, this is just do_cpuid() with ECX as second input.
|
||||
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
|
||||
#else
|
||||
info[0] = function_id; // eax
|
||||
info[2] = subfunction_id; // ecx
|
||||
__asm__(
|
||||
"cpuid"
|
||||
: "=a" (info[0]),
|
||||
"=b" (info[1]),
|
||||
"=c" (info[2]),
|
||||
"=d" (info[3])
|
||||
: "a" (function_id),
|
||||
"c" (subfunction_id)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void __cpuid(int info[4], int function_id) {
|
||||
return __cpuidex(info, function_id, 0);
|
||||
}
|
||||
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
static u64 _xgetbv(u32 index) {
|
||||
u32 eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((u64)edx << 32) | eax;
|
||||
}
|
||||
|
||||
#endif // ifndef _MSC_VER
|
||||
|
||||
// Detects the various CPU features
|
||||
static CPUCaps Detect() {
|
||||
CPUCaps caps = {};
|
||||
|
||||
caps.num_cores = std::thread::hardware_concurrency();
|
||||
|
||||
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
||||
// Citra at all anyway
|
||||
|
||||
int cpu_id[4];
|
||||
memset(caps.brand_string, 0, sizeof(caps.brand_string));
|
||||
|
||||
// Detect CPU's CPUID capabilities and grab CPU string
|
||||
__cpuid(cpu_id, 0x00000000);
|
||||
u32 max_std_fn = cpu_id[0]; // EAX
|
||||
|
||||
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
|
||||
|
||||
__cpuid(cpu_id, 0x80000000);
|
||||
|
||||
u32 max_ex_fn = cpu_id[0];
|
||||
if (!strcmp(caps.brand_string, "GenuineIntel"))
|
||||
caps.vendor = CPUVendor::INTEL;
|
||||
else if (!strcmp(caps.brand_string, "AuthenticAMD"))
|
||||
caps.vendor = CPUVendor::AMD;
|
||||
else
|
||||
caps.vendor = CPUVendor::OTHER;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
|
||||
// Set reasonable default brand string even if brand string not available
|
||||
strncpy(caps.cpu_string, caps.brand_string, sizeof(caps.cpu_string));
|
||||
caps.cpu_string[sizeof(caps.cpu_string) - 1] = '\0';
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
// Detect family and other miscellaneous features
|
||||
if (max_std_fn >= 1) {
|
||||
__cpuid(cpu_id, 0x00000001);
|
||||
|
||||
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
|
||||
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
|
||||
if ((cpu_id[2]) & 1) caps.sse3 = true;
|
||||
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
|
||||
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
|
||||
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
|
||||
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
|
||||
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
|
||||
|
||||
if ((cpu_id[3] >> 24) & 1) {
|
||||
caps.fxsave_fxrstor = true;
|
||||
}
|
||||
|
||||
// AVX support requires 3 separate checks:
|
||||
// - Is the AVX bit set in CPUID?
|
||||
// - Is the XSAVE bit set in CPUID?
|
||||
// - XGETBV result has the XCR bit set.
|
||||
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
|
||||
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
|
||||
caps.avx = true;
|
||||
if ((cpu_id[2] >> 12) & 1)
|
||||
caps.fma = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_std_fn >= 7) {
|
||||
__cpuidex(cpu_id, 0x00000007, 0x00000000);
|
||||
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
|
||||
if ((cpu_id[1] >> 5) & 1)
|
||||
caps.avx2 = caps.avx;
|
||||
if ((cpu_id[1] >> 3) & 1)
|
||||
caps.bmi1 = true;
|
||||
if ((cpu_id[1] >> 8) & 1)
|
||||
caps.bmi2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
caps.flush_to_zero = caps.sse;
|
||||
|
||||
if (max_ex_fn >= 0x80000004) {
|
||||
// Extract CPU model string
|
||||
__cpuid(cpu_id, 0x80000002);
|
||||
std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000003);
|
||||
std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000004);
|
||||
std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
|
||||
}
|
||||
|
||||
if (max_ex_fn >= 0x80000001) {
|
||||
// Check for more features
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
|
||||
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
|
||||
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
|
||||
}
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
const CPUCaps& GetCPUCaps() {
|
||||
static CPUCaps caps = Detect();
|
||||
return caps;
|
||||
}
|
||||
|
||||
std::string GetCPUCapsString() {
|
||||
auto caps = GetCPUCaps();
|
||||
|
||||
std::string sum(caps.cpu_string);
|
||||
sum += " (";
|
||||
sum += caps.brand_string;
|
||||
sum += ")";
|
||||
|
||||
if (caps.sse) sum += ", SSE";
|
||||
if (caps.sse2) {
|
||||
sum += ", SSE2";
|
||||
if (!caps.flush_to_zero) sum += " (without DAZ)";
|
||||
}
|
||||
|
||||
if (caps.sse3) sum += ", SSE3";
|
||||
if (caps.ssse3) sum += ", SSSE3";
|
||||
if (caps.sse4_1) sum += ", SSE4.1";
|
||||
if (caps.sse4_2) sum += ", SSE4.2";
|
||||
if (caps.avx) sum += ", AVX";
|
||||
if (caps.avx2) sum += ", AVX2";
|
||||
if (caps.bmi1) sum += ", BMI1";
|
||||
if (caps.bmi2) sum += ", BMI2";
|
||||
if (caps.fma) sum += ", FMA";
|
||||
if (caps.aes) sum += ", AES";
|
||||
if (caps.movbe) sum += ", MOVBE";
|
||||
if (caps.long_mode) sum += ", 64-bit support";
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
} // namespace Common
|
|
@ -1,66 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Common {
|
||||
|
||||
/// x86/x64 CPU vendors that may be detected by this module
|
||||
enum class CPUVendor {
|
||||
INTEL,
|
||||
AMD,
|
||||
OTHER,
|
||||
};
|
||||
|
||||
/// x86/x64 CPU capabilities that may be detected by this module
|
||||
struct CPUCaps {
|
||||
CPUVendor vendor;
|
||||
char cpu_string[0x21];
|
||||
char brand_string[0x41];
|
||||
int num_cores;
|
||||
bool sse;
|
||||
bool sse2;
|
||||
bool sse3;
|
||||
bool ssse3;
|
||||
bool sse4_1;
|
||||
bool sse4_2;
|
||||
bool lzcnt;
|
||||
bool avx;
|
||||
bool avx2;
|
||||
bool bmi1;
|
||||
bool bmi2;
|
||||
bool fma;
|
||||
bool fma4;
|
||||
bool aes;
|
||||
|
||||
// Support for the FXSAVE and FXRSTOR instructions
|
||||
bool fxsave_fxrstor;
|
||||
|
||||
bool movbe;
|
||||
|
||||
// This flag indicates that the hardware supports some mode in which denormal inputs and outputs
|
||||
// are automatically set to (signed) zero.
|
||||
bool flush_to_zero;
|
||||
|
||||
// Support for LAHF and SAHF instructions in 64-bit mode
|
||||
bool lahf_sahf_64;
|
||||
|
||||
bool long_mode;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the supported capabilities of the host CPU
|
||||
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
|
||||
*/
|
||||
const CPUCaps& GetCPUCaps();
|
||||
|
||||
/**
|
||||
* Gets a string summary of the name and supported capabilities of the host CPU
|
||||
* @return String summary
|
||||
*/
|
||||
std::string GetCPUCapsString();
|
||||
|
||||
} // namespace Common
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -22,11 +22,11 @@ public:
|
|||
}
|
||||
|
||||
std::string thumb16_LSR_imm(Imm5 imm5, Reg m, Reg d) {
|
||||
return Common::StringFromFormat("lsrs %s, %s, #%u", RegToString(d), RegToString(m), imm5);
|
||||
return Common::StringFromFormat("lsrs %s, %s, #%u", RegToString(d), RegToString(m), imm5 != 0 ? imm5 : 32);
|
||||
}
|
||||
|
||||
std::string thumb16_ASR_imm(Imm5 imm5, Reg m, Reg d) {
|
||||
return Common::StringFromFormat("asrs %s, %s, #%u", RegToString(d), RegToString(m), imm5);
|
||||
return Common::StringFromFormat("asrs %s, %s, #%u", RegToString(d), RegToString(m), imm5 != 0 ? imm5 : 32);
|
||||
}
|
||||
|
||||
std::string thumb16_ADD_reg_t1(Reg m, Reg n, Reg d) {
|
||||
|
|
Loading…
Reference in a new issue