emit_x64: Do not clear fast_dispatch_table unnecessarily

Reduces invalidation overhead
This commit is contained in:
MerryMage 2020-04-15 21:26:48 +01:00
parent fb2dc2f55c
commit cd1560c664
5 changed files with 46 additions and 10 deletions

View file

@ -158,12 +158,11 @@ void A32EmitX64::ClearCache() {
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) { void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
ClearFastDispatchTable();
} }
void A32EmitX64::ClearFastDispatchTable() { void A32EmitX64::ClearFastDispatchTable() {
if (config.enable_fast_dispatch) { if (config.enable_fast_dispatch) {
fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr}); fast_dispatch_table.fill({});
} }
} }
@ -278,6 +277,16 @@ void A32EmitX64::GenTerminalHandlers() {
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax); code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
code.jmp(rax); code.jmp(rax);
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
code.crc32(code.ABI_PARAM1.cvt32(), code.ABI_PARAM2.cvt32());
}
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
code.ret();
} }
} }
@ -1511,4 +1520,11 @@ void A32EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
code.EnsurePatchLocationSize(patch_location, 10); code.EnsurePatchLocationSize(patch_location, 10);
} }
void A32EmitX64::Unpatch(const IR::LocationDescriptor& location) {
EmitX64::Unpatch(location);
if (config.enable_fast_dispatch) {
(*fast_dispatch_table_lookup)(location.Value()) = {};
}
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -52,8 +52,8 @@ protected:
BlockRangeInformation<u32> block_ranges; BlockRangeInformation<u32> block_ranges;
struct FastDispatchEntry { struct FastDispatchEntry {
u64 location_descriptor; u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr; const void* code_ptr = nullptr;
}; };
static_assert(sizeof(FastDispatchEntry) == 0x10); static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0; static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
@ -67,6 +67,7 @@ protected:
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers(); void GenTerminalHandlers();
// Microinstruction emitters // Microinstruction emitters
@ -112,6 +113,7 @@ protected:
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override; void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override;
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;

View file

@ -126,12 +126,11 @@ void A64EmitX64::ClearCache() {
void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges) { void A64EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges) {
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
ClearFastDispatchTable();
} }
void A64EmitX64::ClearFastDispatchTable() { void A64EmitX64::ClearFastDispatchTable() {
if (conf.enable_fast_dispatch) { if (conf.enable_fast_dispatch) {
fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr}); fast_dispatch_table.fill({});
} }
} }
@ -325,7 +324,7 @@ void A64EmitX64::GenTerminalHandlers() {
code.L(rsb_cache_miss); code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data())); code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) { if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
code.crc32(rbp, r12d); code.crc32(rbx, r12d);
} }
code.and_(ebp, fast_dispatch_table_mask); code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]); code.lea(rbp, ptr[r12 + rbp]);
@ -338,6 +337,16 @@ void A64EmitX64::GenTerminalHandlers() {
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax); code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
code.jmp(rax); code.jmp(rax);
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint"); PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a64_terminal_handler_fast_dispatch_hint");
code.align();
fast_dispatch_table_lookup = code.getCurr<FastDispatchEntry&(*)(u64)>();
code.mov(code.ABI_PARAM2, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
code.crc32(code.ABI_PARAM1, code.ABI_PARAM2);
}
code.and_(code.ABI_PARAM1.cvt32(), fast_dispatch_table_mask);
code.lea(code.ABI_RETURN, code.ptr[code.ABI_PARAM1 + code.ABI_PARAM2]);
code.ret();
} }
} }
@ -1244,4 +1253,11 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) {
code.EnsurePatchLocationSize(patch_location, 10); code.EnsurePatchLocationSize(patch_location, 10);
} }
void A64EmitX64::Unpatch(const IR::LocationDescriptor& location) {
EmitX64::Unpatch(location);
if (conf.enable_fast_dispatch) {
(*fast_dispatch_table_lookup)(location.Value()) = {};
}
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -52,8 +52,8 @@ protected:
BlockRangeInformation<u64> block_ranges; BlockRangeInformation<u64> block_ranges;
struct FastDispatchEntry { struct FastDispatchEntry {
u64 location_descriptor; u64 location_descriptor = 0xFFFF'FFFF'FFFF'FFFFull;
const void* code_ptr; const void* code_ptr = nullptr;
}; };
static_assert(sizeof(FastDispatchEntry) == 0x10); static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFFF0; static constexpr u64 fast_dispatch_table_mask = 0xFFFFF0;
@ -71,6 +71,7 @@ protected:
const void* terminal_handler_pop_rsb_hint; const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr; const void* terminal_handler_fast_dispatch_hint = nullptr;
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers(); void GenTerminalHandlers();
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
@ -101,6 +102,7 @@ protected:
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override; void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override;
// Patching // Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override; void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override; void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;

View file

@ -109,7 +109,7 @@ protected:
std::vector<CodePtr> mov_rcx; std::vector<CodePtr> mov_rcx;
}; };
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr); void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
void Unpatch(const IR::LocationDescriptor& target_desc); virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0; virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;