From 3b5c43b427ffeb87286c497849c85f3af06faf0b Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 1 Sep 2016 00:06:40 +0100 Subject: [PATCH] Optimization: Read page-table directly for memory access --- include/dynarmic/callbacks.h | 7 ++ src/backend_x64/block_of_code.cpp | 62 +++++++++++++- src/backend_x64/block_of_code.h | 45 ++++++++++- src/backend_x64/emit_x64.cpp | 130 ++++++++++++++++++++++++------ src/backend_x64/interface_x64.cpp | 2 +- 5 files changed, 219 insertions(+), 27 deletions(-) diff --git a/include/dynarmic/callbacks.h b/include/dynarmic/callbacks.h index 7bb1d926..09f7d40f 100644 --- a/include/dynarmic/callbacks.h +++ b/include/dynarmic/callbacks.h @@ -6,6 +6,8 @@ #pragma once +#include +#include #include namespace Dynarmic { @@ -30,6 +32,11 @@ struct UserCallbacks { void (*InterpreterFallback)(std::uint32_t pc, Jit* jit); bool (*CallSVC)(std::uint32_t swi); + + // Page Table + static constexpr std::size_t PAGE_BITS = 12; + static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS); + std::array* page_table = nullptr; }; } // namespace Dynarmic diff --git a/src/backend_x64/block_of_code.cpp b/src/backend_x64/block_of_code.cpp index 0f1fcf50..75ba0922 100644 --- a/src/backend_x64/block_of_code.cpp +++ b/src/backend_x64/block_of_code.cpp @@ -12,11 +12,12 @@ #include "backend_x64/block_of_code.h" #include "backend_x64/jitstate.h" #include "common/assert.h" +#include "dynarmic/callbacks.h" namespace Dynarmic { namespace BackendX64 { -BlockOfCode::BlockOfCode() : Xbyak::CodeGenerator(128 * 1024 * 1024) { +BlockOfCode::BlockOfCode(UserCallbacks cb) : Xbyak::CodeGenerator(128 * 1024 * 1024), cb(cb) { ClearCache(false); } @@ -27,6 +28,7 @@ void BlockOfCode::ClearCache(bool poison_memory) { GenConstants(); GenRunCode(); GenReturnFromRunCode(); + GenMemoryAccessors(); } size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const { @@ -118,6 +120,64 @@ void BlockOfCode::GenReturnFromRunCode() { ret(); } +void BlockOfCode::GenMemoryAccessors() { + align(); + read_memory_8 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryRead8); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + read_memory_16 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryRead16); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + read_memory_32 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryRead32); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + read_memory_64 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryRead64); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + write_memory_8 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryWrite8); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + write_memory_16 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryWrite16); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + write_memory_32 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryWrite32); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); + + align(); + write_memory_64 = getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(this); + CallFunction(cb.MemoryWrite64); + ABI_PopCallerSaveRegistersAndAdjustStack(this); + ret(); +} + void BlockOfCode::SwitchMxcsrOnEntry() { stmxcsr(dword[r15 + offsetof(JitState, save_host_MXCSR)]); ldmxcsr(dword[r15 + offsetof(JitState, guest_MXCSR)]); diff --git a/src/backend_x64/block_of_code.h b/src/backend_x64/block_of_code.h index 4f63ad54..9dda3c4a 100644 --- a/src/backend_x64/block_of_code.h +++ b/src/backend_x64/block_of_code.h @@ -11,13 +11,14 @@ #include "backend_x64/jitstate.h" #include "common/common_types.h" +#include "dynarmic/callbacks.h" namespace Dynarmic { namespace BackendX64 { class BlockOfCode final : public Xbyak::CodeGenerator { public: - BlockOfCode(); + explicit BlockOfCode(UserCallbacks cb); /// Clears this block of code and resets code pointer to beginning. void ClearCache(bool poison_memory); @@ -93,6 +94,36 @@ public: return return_from_run_code; } + const void* GetMemoryReadCallback(size_t bit_size) const { + switch (bit_size) { + case 8: + return read_memory_8; + case 16: + return read_memory_16; + case 32: + return read_memory_32; + case 64: + return read_memory_64; + default: + return nullptr; + } + } + + const void* GetMemoryWriteCallback(size_t bit_size) const { + switch (bit_size) { + case 8: + return write_memory_8; + case 16: + return write_memory_16; + case 32: + return write_memory_32; + case 64: + return write_memory_64; + default: + return nullptr; + } + } + void int3() { db(0xCC); } void nop(size_t size = 1); @@ -114,6 +145,8 @@ public: #endif private: + UserCallbacks cb; + struct Consts { Xbyak::Label FloatPositiveZero32; Xbyak::Label FloatNegativeZero32; @@ -138,6 +171,16 @@ private: const void* return_from_run_code = nullptr; const void* return_from_run_code_without_mxcsr_switch = nullptr; void GenReturnFromRunCode(); + + const void* read_memory_8 = nullptr; + const void* read_memory_16 = nullptr; + const void* read_memory_32 = nullptr; + const void* read_memory_64 = nullptr; + const void* write_memory_8 = nullptr; + const void* write_memory_16 = nullptr; + const void* write_memory_32 = nullptr; + const void* write_memory_64 = nullptr; + void GenMemoryAccessors(); }; } // namespace BackendX64 diff --git a/src/backend_x64/emit_x64.cpp b/src/backend_x64/emit_x64.cpp index 4f53ad07..e79b0a7a 100644 --- a/src/backend_x64/emit_x64.cpp +++ b/src/backend_x64/emit_x64.cpp @@ -8,6 +8,7 @@ #include #include +#include "backend_x64/abi.h" #include "backend_x64/emit_x64.h" #include "backend_x64/jitstate.h" #include "frontend/arm_types.h" @@ -1829,52 +1830,133 @@ void EmitX64::EmitSetExclusive(IR::Block&, IR::Inst* inst) { code->mov(dword[r15 + offsetof(JitState, exclusive_address)], address); } -void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst, inst->GetArg(0)); +template +static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + if (!cb.page_table) { + reg_alloc.HostCall(inst, inst->GetArg(0)); + code->CallFunction(fn); + return; + } - code->CallFunction(cb.MemoryRead8); + using namespace Xbyak::util; + + Xbyak::Reg64 result = reg_alloc.DefGpr(inst, { ABI_RETURN }); + Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32(); + Xbyak::Reg64 page_index = reg_alloc.ScratchGpr(); + Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr(); + + Xbyak::Label abort, end; + + code->mov(rax, u64(cb.page_table)); + code->mov(page_index.cvt32(), vaddr); + code->shr(page_index.cvt32(), 12); + code->mov(rax, qword[rax + page_index * 8]); + code->test(rax, rax); + code->jz(abort); + code->mov(page_offset.cvt32(), vaddr); + code->and_(page_offset.cvt32(), 4095); + switch (bit_size) { + case 8: + code->movzx(result, code->byte[rax + page_offset]); + break; + case 16: + code->movzx(result, word[rax + page_offset]); + break; + case 32: + code->mov(result.cvt32(), dword[rax + page_offset]); + break; + case 64: + code->mov(result.cvt64(), qword[rax + page_offset]); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + code->jmp(end); + code->L(abort); + code->call(code->GetMemoryReadCallback(bit_size)); + code->L(end); +} + +template +static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, UserCallbacks& cb, size_t bit_size, FunctionPointer fn) { + if (!cb.page_table) { + reg_alloc.HostCall(inst, inst->GetArg(0), inst->GetArg(1)); + code->CallFunction(fn); + return; + } + + using namespace Xbyak::util; + + reg_alloc.ScratchGpr({ HostLoc::RAX }); + Xbyak::Reg32 vaddr = reg_alloc.UseScratchGpr(inst->GetArg(0), { ABI_PARAM1 }).cvt32(); + Xbyak::Reg64 value = reg_alloc.UseScratchGpr(inst->GetArg(1), { ABI_PARAM2 }); + Xbyak::Reg64 page_index = reg_alloc.ScratchGpr(); + Xbyak::Reg64 page_offset = reg_alloc.ScratchGpr(); + + Xbyak::Label abort, end; + + code->mov(rax, u64(cb.page_table)); + code->mov(page_index.cvt32(), vaddr); + code->shr(page_index.cvt32(), 12); + code->mov(rax, qword[rax + page_index * 8]); + code->test(rax, rax); + code->jz(abort); + code->mov(page_offset.cvt32(), vaddr); + code->and_(page_offset.cvt32(), 4095); + switch (bit_size) { + case 8: + code->mov(code->byte[rax + page_offset], value.cvt8()); + break; + case 16: + code->mov(word[rax + page_offset], value.cvt16()); + break; + case 32: + code->mov(dword[rax + page_offset], value.cvt32()); + break; + case 64: + code->mov(qword[rax + page_offset], value.cvt64()); + break; + default: + ASSERT_MSG(false, "Invalid bit_size"); + break; + } + code->jmp(end); + code->L(abort); + code->call(code->GetMemoryWriteCallback(bit_size)); + code->L(end); +} + +void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) { + ReadMemory(code, reg_alloc, inst, cb, 8, cb.MemoryRead8); } void EmitX64::EmitReadMemory16(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst, inst->GetArg(0)); - - code->CallFunction(cb.MemoryRead16); + ReadMemory(code, reg_alloc, inst, cb, 16, cb.MemoryRead16); } void EmitX64::EmitReadMemory32(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst, inst->GetArg(0)); - - code->CallFunction(cb.MemoryRead32); + ReadMemory(code, reg_alloc, inst, cb, 32, cb.MemoryRead32); } void EmitX64::EmitReadMemory64(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(inst, inst->GetArg(0)); - - code->CallFunction(cb.MemoryRead64); + ReadMemory(code, reg_alloc, inst, cb, 64, cb.MemoryRead64); } void EmitX64::EmitWriteMemory8(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - - code->CallFunction(cb.MemoryWrite8); + WriteMemory(code, reg_alloc, inst, cb, 8, cb.MemoryWrite8); } void EmitX64::EmitWriteMemory16(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - - code->CallFunction(cb.MemoryWrite16); + WriteMemory(code, reg_alloc, inst, cb, 16, cb.MemoryWrite16); } void EmitX64::EmitWriteMemory32(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - - code->CallFunction(cb.MemoryWrite32); + WriteMemory(code, reg_alloc, inst, cb, 32, cb.MemoryWrite32); } void EmitX64::EmitWriteMemory64(IR::Block&, IR::Inst* inst) { - reg_alloc.HostCall(nullptr, inst->GetArg(0), inst->GetArg(1)); - - code->CallFunction(cb.MemoryWrite64); + WriteMemory(code, reg_alloc, inst, cb, 64, cb.MemoryWrite64); } template diff --git a/src/backend_x64/interface_x64.cpp b/src/backend_x64/interface_x64.cpp index c770a601..56c1c176 100644 --- a/src/backend_x64/interface_x64.cpp +++ b/src/backend_x64/interface_x64.cpp @@ -31,7 +31,7 @@ using namespace BackendX64; struct Jit::Impl { Impl(Jit* jit, UserCallbacks callbacks) - : block_of_code() + : block_of_code(callbacks) , jit_state() , emitter(&block_of_code, callbacks, jit) , callbacks(callbacks)