From cd1560c66477dbdbe30ed24c55ed8bd610a8c81a Mon Sep 17 00:00:00 2001 From: MerryMage Date: Wed, 15 Apr 2020 21:26:48 +0100 Subject: [PATCH] emit_x64: Do not clear fast_dispatch_table unnecessarily Reduces invalidation overhead --- src/backend/x64/a32_emit_x64.cpp | 20 ++++++++++++++++++-- src/backend/x64/a32_emit_x64.h | 6 ++++-- src/backend/x64/a64_emit_x64.cpp | 22 +++++++++++++++++++--- src/backend/x64/a64_emit_x64.h | 6 ++++-- src/backend/x64/emit_x64.h | 2 +- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 1aed8acf..172fab84 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -158,12 +158,11 @@ void A32EmitX64::ClearCache() { void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); - ClearFastDispatchTable(); } void A32EmitX64::ClearFastDispatchTable() { if (config.enable_fast_dispatch) { - fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr}); + fast_dispatch_table.fill({}); } } @@ -278,6 +277,16 @@ void A32EmitX64::GenTerminalHandlers() { code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax); code.jmp(rax); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint"); + + code.align(); + fast_dispatch_table_lookup = code.getCurr(); + code.mov(code.ABI_PARAM2, reinterpret_cast(fast_dispatch_table.data())); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { + code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32()); + } + code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); + code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); + code.ret(); } } @@ -1511,4 +1520,11 @@ void A32EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { code.EnsurePatchLocationSize(patch_location, 10); } +void A32EmitX64::Unpatch(const IR::LocationDescriptor& location) { + EmitX64::Unpatch(location); + if (config.enable_fast_dispatch) { + (*fast_dispatch_table_lookup)(location.Value()) = {}; + } +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index 9a77c80a..7821f162 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -52,8 +52,8 @@ protected: BlockRangeInformation block_ranges; struct FastDispatchEntry { - u64 location_descriptor; - const void* code_ptr; + u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull; + const void* code_ptr = nullptr; }; static_assert(sizeof(FastDispatchEntry) == 0x10); static constexpr u64 fast_dispatch_table_mask = 0xFFFF0; @@ -67,6 +67,7 @@ protected: const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_fast_dispatch_hint = nullptr; + FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; void GenTerminalHandlers(); // Microinstruction emitters @@ -112,6 +113,7 @@ protected: void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override; // Patching + void Unpatch(const IR::LocationDescriptor& target_desc) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index 1741df20..1266ed31 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -126,12 +126,11 @@ void A64EmitX64::ClearCache() { void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set& ranges) { InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); - ClearFastDispatchTable(); } void A64EmitX64::ClearFastDispatchTable() { if (conf.enable_fast_dispatch) { - fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr}); + fast_dispatch_table.fill({}); } } @@ -325,7 +324,7 @@ void A64EmitX64::GenTerminalHandlers() { code.L(rsb_cache_miss); code.mov(r12, reinterpret_cast(fast_dispatch_table.data())); if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { - code.crc32(rbp, r12d); + code.crc32(rbx, r12d); } code.and_(ebp, fast_dispatch_table_mask); code.lea(rbp, ptr[r12 + rbp]); @@ -338,6 +337,16 @@ void A64EmitX64::GenTerminalHandlers() { code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax); code.jmp(rax); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint"); + + code.align(); + fast_dispatch_table_lookup = code.getCurr(); + code.mov(code.ABI_PARAM2, reinterpret_cast(fast_dispatch_table.data())); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { + code.crc32(code.ABI_PARAM1, code.ABI_PARAM2); + } + code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask); + code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]); + code.ret(); } } @@ -1244,4 +1253,11 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { code.EnsurePatchLocationSize(patch_location, 10); } +void A64EmitX64::Unpatch(const IR::LocationDescriptor& location) { + EmitX64::Unpatch(location); + if (conf.enable_fast_dispatch) { + (*fast_dispatch_table_lookup)(location.Value()) = {}; + } +} + } // namespace Dynarmic::Backend::X64 diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index cb5a526d..5c837aa2 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -52,8 +52,8 @@ protected: BlockRangeInformation block_ranges; struct FastDispatchEntry { - u64 location_descriptor; - const void* code_ptr; + u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull; + const void* code_ptr = nullptr; }; static_assert(sizeof(FastDispatchEntry) == 0x10); static constexpr u64 fast_dispatch_table_mask = 0xFFFFF0; @@ -71,6 +71,7 @@ protected: const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_fast_dispatch_hint = nullptr; + FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; void GenTerminalHandlers(); void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); @@ -101,6 +102,7 @@ protected: void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override; // Patching + void Unpatch(const IR::LocationDescriptor& target_desc) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index ed13f156..6bab043a 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -109,7 +109,7 @@ protected: std::vector mov_rcx; }; void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); - void Unpatch(const IR::LocationDescriptor& target_desc); + virtual void Unpatch(const IR::LocationDescriptor& target_desc); virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;