Optimization: Read page-table directly for memory access
This commit is contained in:
parent
57169ec093
commit
3b5c43b427
5 changed files with 219 additions and 27 deletions
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
@ -30,6 +32,11 @@ struct UserCallbacks {
|
||||||
void (*InterpreterFallback)(std::uint32_t pc, Jit* jit);
|
void (*InterpreterFallback)(std::uint32_t pc, Jit* jit);
|
||||||
|
|
||||||
bool (*CallSVC)(std::uint32_t swi);
|
bool (*CallSVC)(std::uint32_t swi);
|
||||||
|
|
||||||
|
// Page Table
|
||||||
|
static constexpr std::size_t PAGE_BITS = 12;
|
||||||
|
static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
|
||||||
|
std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>* page_table = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
|
@ -12,11 +12,12 @@
|
||||||
#include "backend_x64/block_of_code.h"
|
#include "backend_x64/block_of_code.h"
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "dynarmic/callbacks.h"
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) {
|
BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) {
|
||||||
ClearCache(false);
|
ClearCache(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +28,7 @@ void BlockOfCode::ClearCache(bool poison_memory) {
|
||||||
GenConstants();
|
GenConstants();
|
||||||
GenRunCode();
|
GenRunCode();
|
||||||
GenReturnFromRunCode();
|
GenReturnFromRunCode();
|
||||||
|
GenMemoryAccessors();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
|
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
|
||||||
|
@ -118,6 +120,64 @@ void BlockOfCode::GenReturnFromRunCode() {
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BlockOfCode::GenMemoryAccessors() {
|
||||||
|
align();
|
||||||
|
read_memory_8 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryRead8);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
read_memory_16 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryRead16);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
read_memory_32 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryRead32);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
read_memory_64 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryRead64);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
write_memory_8 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryWrite8);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
write_memory_16 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryWrite16);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
write_memory_32 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryWrite32);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
|
||||||
|
align();
|
||||||
|
write_memory_64 = getCurr<const void*>();
|
||||||
|
ABI_PushCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
CallFunction(cb.MemoryWrite64);
|
||||||
|
ABI_PopCallerSaveRegistersAndAdjustStack(this);
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||||
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]);
|
||||||
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
|
ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]);
|
||||||
|
|
|
@ -11,13 +11,14 @@
|
||||||
|
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "dynarmic/callbacks.h"
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
namespace BackendX64 {
|
namespace BackendX64 {
|
||||||
|
|
||||||
class BlockOfCode final : public Xbyak::CodeGenerator {
|
class BlockOfCode final : public Xbyak::CodeGenerator {
|
||||||
public:
|
public:
|
||||||
BlockOfCode();
|
explicit BlockOfCode(UserCallbacks cb);
|
||||||
|
|
||||||
/// Clears this block of code and resets code pointer to beginning.
|
/// Clears this block of code and resets code pointer to beginning.
|
||||||
void ClearCache(bool poison_memory);
|
void ClearCache(bool poison_memory);
|
||||||
|
@ -93,6 +94,36 @@ public:
|
||||||
return return_from_run_code;
|
return return_from_run_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const void* GetMemoryReadCallback(size_t bit_size) const {
|
||||||
|
switch (bit_size) {
|
||||||
|
case 8:
|
||||||
|
return read_memory_8;
|
||||||
|
case 16:
|
||||||
|
return read_memory_16;
|
||||||
|
case 32:
|
||||||
|
return read_memory_32;
|
||||||
|
case 64:
|
||||||
|
return read_memory_64;
|
||||||
|
default:
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* GetMemoryWriteCallback(size_t bit_size) const {
|
||||||
|
switch (bit_size) {
|
||||||
|
case 8:
|
||||||
|
return write_memory_8;
|
||||||
|
case 16:
|
||||||
|
return write_memory_16;
|
||||||
|
case 32:
|
||||||
|
return write_memory_32;
|
||||||
|
case 64:
|
||||||
|
return write_memory_64;
|
||||||
|
default:
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void int3() { db(0xCC); }
|
void int3() { db(0xCC); }
|
||||||
void nop(size_t size = 1);
|
void nop(size_t size = 1);
|
||||||
|
|
||||||
|
@ -114,6 +145,8 @@ public:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
UserCallbacks cb;
|
||||||
|
|
||||||
struct Consts {
|
struct Consts {
|
||||||
Xbyak::Label FloatPositiveZero32;
|
Xbyak::Label FloatPositiveZero32;
|
||||||
Xbyak::Label FloatNegativeZero32;
|
Xbyak::Label FloatNegativeZero32;
|
||||||
|
@ -138,6 +171,16 @@ private:
|
||||||
const void* return_from_run_code = nullptr;
|
const void* return_from_run_code = nullptr;
|
||||||
const void* return_from_run_code_without_mxcsr_switch = nullptr;
|
const void* return_from_run_code_without_mxcsr_switch = nullptr;
|
||||||
void GenReturnFromRunCode();
|
void GenReturnFromRunCode();
|
||||||
|
|
||||||
|
const void* read_memory_8 = nullptr;
|
||||||
|
const void* read_memory_16 = nullptr;
|
||||||
|
const void* read_memory_32 = nullptr;
|
||||||
|
const void* read_memory_64 = nullptr;
|
||||||
|
const void* write_memory_8 = nullptr;
|
||||||
|
const void* write_memory_16 = nullptr;
|
||||||
|
const void* write_memory_32 = nullptr;
|
||||||
|
const void* write_memory_64 = nullptr;
|
||||||
|
void GenMemoryAccessors();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace BackendX64
|
} // namespace BackendX64
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <common/bit_util.h>
|
#include <common/bit_util.h>
|
||||||
|
|
||||||
|
#include "backend_x64/abi.h"
|
||||||
#include "backend_x64/emit_x64.h"
|
#include "backend_x64/emit_x64.h"
|
||||||
#include "backend_x64/jitstate.h"
|
#include "backend_x64/jitstate.h"
|
||||||
#include "frontend/arm_types.h"
|
#include "frontend/arm_types.h"
|
||||||
|
@ -1829,52 +1830,133 @@ void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) {
|
||||||
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
template <typename FunctionPointer>
|
||||||
|
static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
||||||
|
if (!cb.page_table) {
|
||||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||||
|
code->CallFunction(fn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryRead8);
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
|
Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN });
|
||||||
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
||||||
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
||||||
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
Xbyak::Label abort, end;
|
||||||
|
|
||||||
|
code->mov(rax, u64(cb.page_table));
|
||||||
|
code->mov(page_index.cvt32(), vaddr);
|
||||||
|
code->shr(page_index.cvt32(), 12);
|
||||||
|
code->mov(rax, qword[rax + page_index * 8]);
|
||||||
|
code->test(rax, rax);
|
||||||
|
code->jz(abort);
|
||||||
|
code->mov(page_offset.cvt32(), vaddr);
|
||||||
|
code->and_(page_offset.cvt32(), 4095);
|
||||||
|
switch (bit_size) {
|
||||||
|
case 8:
|
||||||
|
code->movzx(result, code->byte[rax + page_offset]);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code->movzx(result, word[rax + page_offset]);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code->mov(result.cvt32(), dword[rax + page_offset]);
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
code->mov(result.cvt64(), qword[rax + page_offset]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Invalid bit_size");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
code->jmp(end);
|
||||||
|
code->L(abort);
|
||||||
|
code->call(code->GetMemoryReadCallback(bit_size));
|
||||||
|
code->L(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename FunctionPointer>
|
||||||
|
static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) {
|
||||||
|
if (!cb.page_table) {
|
||||||
|
reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1));
|
||||||
|
code->CallFunction(fn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
using namespace Xbyak::util;
|
||||||
|
|
||||||
|
reg_alloc.ScratchGpr({ HostLoc::RAX });
|
||||||
|
Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32();
|
||||||
|
Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 });
|
||||||
|
Xbyak::Reg64 page_index = reg_alloc.ScratchGpr();
|
||||||
|
Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
|
Xbyak::Label abort, end;
|
||||||
|
|
||||||
|
code->mov(rax, u64(cb.page_table));
|
||||||
|
code->mov(page_index.cvt32(), vaddr);
|
||||||
|
code->shr(page_index.cvt32(), 12);
|
||||||
|
code->mov(rax, qword[rax + page_index * 8]);
|
||||||
|
code->test(rax, rax);
|
||||||
|
code->jz(abort);
|
||||||
|
code->mov(page_offset.cvt32(), vaddr);
|
||||||
|
code->and_(page_offset.cvt32(), 4095);
|
||||||
|
switch (bit_size) {
|
||||||
|
case 8:
|
||||||
|
code->mov(code->byte[rax + page_offset], value.cvt8());
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code->mov(word[rax + page_offset], value.cvt16());
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code->mov(dword[rax + page_offset], value.cvt32());
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
code->mov(qword[rax + page_offset], value.cvt64());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Invalid bit_size");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
code->jmp(end);
|
||||||
|
code->L(abort);
|
||||||
|
code->call(code->GetMemoryWriteCallback(bit_size));
|
||||||
|
code->L(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||||
|
ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryRead16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryRead32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryRead64);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryWrite8);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryWrite16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryWrite32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
|
void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) {
|
||||||
reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1));
|
WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64);
|
||||||
|
|
||||||
code->CallFunction(cb.MemoryWrite64);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename FunctionPointer>
|
template <typename FunctionPointer>
|
||||||
|
|
|
@ -31,7 +31,7 @@ using namespace BackendX64;
|
||||||
|
|
||||||
struct Jit::Impl {
|
struct Jit::Impl {
|
||||||
Impl(Jit* jit, UserCallbacks callbacks)
|
Impl(Jit* jit, UserCallbacks callbacks)
|
||||||
: block_of_code()
|
: block_of_code(callbacks)
|
||||||
, jit_state()
|
, jit_state()
|
||||||
, emitter(&block_of_code, callbacks, jit)
|
, emitter(&block_of_code, callbacks, jit)
|
||||||
, callbacks(callbacks)
|
, callbacks(callbacks)
|
||||||
|
|
Loading…
Reference in a new issue