From 4636055646ca99dd774ad94188e7b3c7eef0acb2 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 8 Apr 2020 16:56:57 +0100 Subject: [PATCH] a32_emit_x64: Implement fastmem --- include/dynarmic/A32/config.h | 9 ++ src/CMakeLists.txt | 1 + src/backend/x64/a32_emit_x64.cpp | 114 +++++++++++++++++- src/backend/x64/a32_emit_x64.h | 17 ++- src/backend/x64/a32_interface.cpp | 14 +-- src/backend/x64/emit_x64.cpp | 5 + src/backend/x64/emit_x64.h | 1 + src/backend/x64/exception_handler.h | 12 ++ src/backend/x64/exception_handler_generic.cpp | 8 ++ src/backend/x64/exception_handler_windows.cpp | 8 ++ src/frontend/ir/location_descriptor.h | 6 +- 11 files changed, 180 insertions(+), 15 deletions(-) diff --git a/include/dynarmic/A32/config.h b/include/dynarmic/A32/config.h index ee96c197..aa5f5d02 100644 --- a/include/dynarmic/A32/config.h +++ b/include/dynarmic/A32/config.h @@ -101,6 +101,15 @@ struct UserConfig { /// This can be avoided by carefully allocating the memory region. bool absolute_offset_page_table = false; + // Fastmem Pointer + // This should point to the beginning of a 4GB address space which is in arranged just like + // what you wish for emulated memory to be. If the host page faults on an address, the JIT + // will fallback to calling the MemoryRead*/MemoryWrite* callbacks. + void* fastmem_pointer = nullptr; + /// Determines if instructions that pagefault should cause recompilation of that block + /// with fastmem disabled. + bool recompile_on_fastmem_failure = true; + // Coprocessors std::array, 16> coprocessors{}; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 22f14652..7c4e3979 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -248,6 +248,7 @@ if (ARCHITECTURE_x86_64) backend/x64/emit_x64_sm4.cpp backend/x64/emit_x64_vector.cpp backend/x64/emit_x64_vector_floating_point.cpp + backend/x64/exception_handler.h backend/x64/hostloc.cpp backend/x64/hostloc.h backend/x64/jitstate_info.h diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 3895f31c..1aed8acf 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -72,6 +72,10 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_ GenTerminalHandlers(); code.PreludeComplete(); ClearFastDispatchTable(); + + exception_handler.SetFastmemCallback([this](u64 rip_){ + return FastmemCallback(rip_); + }); } A32EmitX64::~A32EmitX64() = default; @@ -91,6 +95,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { if (config.page_table) { gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); } + if (config.fastmem_pointer) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13)); + } return gprs; }(); @@ -146,6 +153,7 @@ void A32EmitX64::ClearCache() { EmitX64::ClearCache(); block_ranges.ClearCache(); ClearFastDispatchTable(); + fastmem_patch_info.clear(); } void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { @@ -777,6 +785,32 @@ void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) { code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address); } +std::optional A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const { + if (!config.fastmem_pointer || !exception_handler.SupportsFastmem()) { + return std::nullopt; + } + + const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); + if (do_not_fastmem.count(marker) > 0) { + return std::nullopt; + } + return marker; +} + +FakeCall A32EmitX64::FastmemCallback(u64 rip_) { + const auto iter = fastmem_patch_info.find(rip_); + ASSERT(iter != fastmem_patch_info.end()); + if (config.recompile_on_fastmem_failure) { + const auto marker = iter->second.marker; + do_not_fastmem.emplace(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + FakeCall ret; + ret.call_rip = iter->second.callback; + ret.ret_rip = iter->second.resume_rip; + return ret; +} + static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, const A32::UserConfig& config, Xbyak::Label& abort, Xbyak::Reg64 vaddr, @@ -823,13 +857,48 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { return; } - Xbyak::Label abort, end; - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + if (const auto marker = ShouldFastmem(ctx, inst)) { + const auto location = code.getCurr(); + + switch (bitsize) { + case 8: + code.movzx(value.cvt32(), code.byte[r13 + vaddr]); + break; + case 16: + code.movzx(value.cvt32(), word[r13 + vaddr]); + break; + case 32: + code.mov(value.cvt32(), dword[r13 + vaddr]); + break; + case 64: + code.mov(value, qword[r13 + vaddr]); + break; + default: + ASSERT_MSG(false, "Invalid bitsize"); + break; + } + + ctx.reg_alloc.DefineValue(inst, value); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *marker, + } + ); + + return; + } + + Xbyak::Label abort, end; + const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value); switch (bitsize) { case 8: @@ -845,7 +914,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { code.mov(value, qword[src_ptr]); break; default: - ASSERT_MSG(false, "Invalid bit_size"); + ASSERT_MSG(false, "Invalid bitsize"); break; } code.jmp(end); @@ -881,13 +950,46 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) { return; } - Xbyak::Label abort, end; - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + if (const auto marker = ShouldFastmem(ctx, inst)) { + const auto location = code.getCurr(); + + switch (bitsize) { + case 8: + code.mov(code.byte[r13 + vaddr], value.cvt8()); + break; + case 16: + code.mov(word[r13 + vaddr], value.cvt16()); + break; + case 32: + code.mov(dword[r13 + vaddr], value.cvt32()); + break; + case 64: + code.mov(qword[r13 + vaddr], value); + break; + default: + ASSERT_MSG(false, "Invalid bitsize"); + break; + } + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *marker, + } + ); + + return; + } + + Xbyak::Label abort, end; + const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr); switch (bitsize) { case 8: @@ -903,7 +1005,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) { code.mov(qword[dest_ptr], value); break; default: - ASSERT_MSG(false, "Invalid bit_size"); + ASSERT_MSG(false, "Invalid bitsize"); break; } code.jmp(end); diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index c6a5b912..9a77c80a 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -7,6 +7,10 @@ #pragma once #include +#include +#include +#include +#include #include #include @@ -46,7 +50,6 @@ protected: const A32::UserConfig config; A32::Jit* jit_interface; BlockRangeInformation block_ranges; - ExceptionHandler exception_handler; struct FastDispatchEntry { u64 location_descriptor; @@ -78,6 +81,18 @@ protected: // Helpers std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; + // Fastmem information + using DoNotFastmemMarker = std::tuple; + struct FastmemPatchInfo { + u64 resume_rip; + u64 callback; + DoNotFastmemMarker marker; + }; + std::unordered_map fastmem_patch_info; + std::set do_not_fastmem; + std::optional ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const; + FakeCall FastmemCallback(u64 rip); + // Memory access helpers template void ReadMemory(A32EmitContext& ctx, IR::Inst* inst); diff --git a/src/backend/x64/a32_interface.cpp b/src/backend/x64/a32_interface.cpp index cb1fe53c..6fba66d9 100644 --- a/src/backend/x64/a32_interface.cpp +++ b/src/backend/x64/a32_interface.cpp @@ -42,13 +42,13 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo } static std::function GenRCP(const A32::UserConfig& config) { - if (!config.page_table) { - return [](BlockOfCode&){}; - } - - const u64 r14_value = Common::BitCast(config.page_table); - return [r14_value](BlockOfCode& code) { - code.mov(code.r14, r14_value); + return [config](BlockOfCode& code) { + if (config.page_table) { + code.mov(code.r14, Common::BitCast(config.page_table)); + } + if (config.fastmem_pointer) { + code.mov(code.r13, Common::BitCast(config.fastmem_pointer)); + } }; } diff --git a/src/backend/x64/emit_x64.cpp b/src/backend/x64/emit_x64.cpp index 82ae3672..1049ac21 100644 --- a/src/backend/x64/emit_x64.cpp +++ b/src/backend/x64/emit_x64.cpp @@ -4,6 +4,7 @@ * General Public License version 2 or any later version. */ +#include #include #include "backend/x64/block_of_code.h" @@ -28,6 +29,10 @@ using namespace Xbyak::util; EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) : reg_alloc(reg_alloc), block(block) {} +size_t EmitContext::GetInstOffset(IR::Inst* inst) const { + return static_cast(std::distance(block.begin(), IR::Block::iterator(inst))); +} + void EmitContext::EraseInstruction(IR::Inst* inst) { block.Instructions().erase(inst); inst->ClearArgs(); diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index dddf0ef4..ed13f156 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -43,6 +43,7 @@ using VectorArray = std::array struct EmitContext { EmitContext(RegAlloc& reg_alloc, IR::Block& block); + size_t GetInstOffset(IR::Inst* inst) const; void EraseInstruction(IR::Inst* inst); virtual FP::FPCR FPCR() const = 0; diff --git a/src/backend/x64/exception_handler.h b/src/backend/x64/exception_handler.h index c205949b..dab18ab4 100644 --- a/src/backend/x64/exception_handler.h +++ b/src/backend/x64/exception_handler.h @@ -6,18 +6,30 @@ #pragma once +#include #include +#include "common/common_types.h" + namespace Dynarmic::Backend::X64 { class BlockOfCode; +struct FakeCall { + u64 call_rip; + u64 ret_rip; +}; + class ExceptionHandler final { public: ExceptionHandler(); ~ExceptionHandler(); void Register(BlockOfCode& code); + + bool SupportsFastmem() const noexcept; + void SetFastmemCallback(std::function cb); + private: struct Impl; std::unique_ptr impl; diff --git a/src/backend/x64/exception_handler_generic.cpp b/src/backend/x64/exception_handler_generic.cpp index ae9986be..75a787fd 100644 --- a/src/backend/x64/exception_handler_generic.cpp +++ b/src/backend/x64/exception_handler_generic.cpp @@ -18,4 +18,12 @@ void ExceptionHandler::Register(BlockOfCode&) { // Do nothing } +bool ExceptionHandler::SupportsFastmem() const noexcept { + return false; +} + +void ExceptionHandler::SetFastmemCallback(std::function) { + // Do nothing +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/exception_handler_windows.cpp b/src/backend/x64/exception_handler_windows.cpp index 1fed9f47..537c54db 100644 --- a/src/backend/x64/exception_handler_windows.cpp +++ b/src/backend/x64/exception_handler_windows.cpp @@ -198,4 +198,12 @@ void ExceptionHandler::Register(BlockOfCode& code) { impl = std::make_unique(rfuncs, code.getCode()); } +bool ExceptionHandler::SupportsFastmem() const noexcept { + return false; +} + +void ExceptionHandler::SetFastmemCallback(std::function) { + // Do nothing +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/frontend/ir/location_descriptor.h b/src/frontend/ir/location_descriptor.h index 89d06db3..fd16c506 100644 --- a/src/frontend/ir/location_descriptor.h +++ b/src/frontend/ir/location_descriptor.h @@ -33,13 +33,17 @@ private: std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor); +inline bool operator<(const LocationDescriptor& x, const LocationDescriptor& y) noexcept { + return x.Value() < y.Value(); +} + } // namespace Dynarmic::IR namespace std { template <> struct less { bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept { - return x.Value() < y.Value(); + return x < y; } }; template <>