Implement perfmap
This commit is contained in:
parent
f73104633b
commit
de4494ffa5
10 changed files with 184 additions and 12 deletions
|
@ -244,6 +244,8 @@ if (ARCHITECTURE_x86_64)
|
|||
backend/x64/hostloc.h
|
||||
backend/x64/jitstate_info.h
|
||||
backend/x64/oparg.h
|
||||
backend/x64/perf_map.cpp
|
||||
backend/x64/perf_map.h
|
||||
backend/x64/reg_alloc.cpp
|
||||
backend/x64/reg_alloc.h
|
||||
)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include <dynarmic/A32/coprocessor.h>
|
||||
|
@ -18,6 +19,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/address_range.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
@ -132,17 +134,15 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
|
||||
code.int3();
|
||||
|
||||
const A32::LocationDescriptor descriptor{block.Location()};
|
||||
Patch(descriptor, entrypoint);
|
||||
|
||||
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
|
||||
|
||||
const A32::LocationDescriptor descriptor{block.Location()};
|
||||
const A32::LocationDescriptor end_location{block.EndLocation()};
|
||||
|
||||
const auto range = boost::icl::discrete_interval<u32>::closed(descriptor.PC(), end_location.PC() - 1);
|
||||
A32EmitX64::BlockDescriptor block_desc{entrypoint, size};
|
||||
block_descriptors.emplace(descriptor.UniqueHash(), block_desc);
|
||||
block_ranges.AddRange(range, descriptor);
|
||||
|
||||
return block_desc;
|
||||
return RegisterBlock(descriptor, entrypoint, size);
|
||||
}
|
||||
|
||||
void A32EmitX64::ClearCache() {
|
||||
|
@ -161,6 +161,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryRead8>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(read_memory_8, code.getCurr(), "a32_read_memory_8");
|
||||
|
||||
code.align();
|
||||
read_memory_16 = code.getCurr<const void*>();
|
||||
|
@ -168,6 +169,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryRead16>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(read_memory_16, code.getCurr(), "a32_read_memory_16");
|
||||
|
||||
code.align();
|
||||
read_memory_32 = code.getCurr<const void*>();
|
||||
|
@ -175,6 +177,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryRead32>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(read_memory_32, code.getCurr(), "a32_read_memory_32");
|
||||
|
||||
code.align();
|
||||
read_memory_64 = code.getCurr<const void*>();
|
||||
|
@ -182,6 +185,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryRead64>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(read_memory_64, code.getCurr(), "a32_read_memory_64");
|
||||
|
||||
code.align();
|
||||
write_memory_8 = code.getCurr<const void*>();
|
||||
|
@ -189,6 +193,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryWrite8>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(write_memory_8, code.getCurr(), "a32_write_memory_8");
|
||||
|
||||
code.align();
|
||||
write_memory_16 = code.getCurr<const void*>();
|
||||
|
@ -196,6 +201,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryWrite16>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(write_memory_16, code.getCurr(), "a32_write_memory_16");
|
||||
|
||||
code.align();
|
||||
write_memory_32 = code.getCurr<const void*>();
|
||||
|
@ -203,6 +209,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryWrite32>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(write_memory_32, code.getCurr(), "a32_write_memory_32");
|
||||
|
||||
code.align();
|
||||
write_memory_64 = code.getCurr<const void*>();
|
||||
|
@ -210,6 +217,7 @@ void A32EmitX64::GenMemoryAccessors() {
|
|||
Devirtualize<&A32::UserCallbacks::MemoryWrite64>(config.callbacks).EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
|
||||
code.ret();
|
||||
PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64");
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -1141,6 +1149,15 @@ void A32EmitX64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]);
|
||||
}
|
||||
|
||||
std::string A32EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const {
|
||||
const A32::LocationDescriptor descriptor{ir_descriptor};
|
||||
return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}",
|
||||
descriptor.TFlag() ? "t" : "a",
|
||||
descriptor.PC(),
|
||||
descriptor.EFlag() ? "be" : "le",
|
||||
descriptor.FPSCR().Value());
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
|
||||
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented");
|
||||
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented");
|
||||
|
|
|
@ -68,6 +68,9 @@ protected:
|
|||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <initializer_list>
|
||||
|
||||
#include <dynarmic/A64/exclusive_monitor.h>
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include "backend/x64/a64_emit_x64.h"
|
||||
|
@ -15,6 +16,7 @@
|
|||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/address_range.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
|
@ -118,17 +120,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
|
|||
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
|
||||
code.int3();
|
||||
|
||||
const A64::LocationDescriptor descriptor{block.Location()};
|
||||
Patch(descriptor, entrypoint);
|
||||
|
||||
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
|
||||
|
||||
const A64::LocationDescriptor descriptor{block.Location()};
|
||||
const A64::LocationDescriptor end_location{block.EndLocation()};
|
||||
|
||||
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
|
||||
A64EmitX64::BlockDescriptor block_desc{entrypoint, size};
|
||||
block_descriptors.emplace(descriptor.UniqueHash(), block_desc);
|
||||
block_ranges.AddRange(range, descriptor);
|
||||
|
||||
return block_desc;
|
||||
return RegisterBlock(descriptor, entrypoint, size);
|
||||
}
|
||||
|
||||
void A64EmitX64::ClearCache() {
|
||||
|
@ -166,6 +166,7 @@ void A64EmitX64::GenMemory128Accessors() {
|
|||
code.add(rsp, 8);
|
||||
#endif
|
||||
code.ret();
|
||||
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
|
||||
|
||||
code.align();
|
||||
memory_write_128 = code.getCurr<void(*)()>();
|
||||
|
@ -189,6 +190,7 @@ void A64EmitX64::GenMemory128Accessors() {
|
|||
code.add(rsp, 8);
|
||||
#endif
|
||||
code.ret();
|
||||
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_write_128");
|
||||
}
|
||||
|
||||
void A64EmitX64::GenFastmemFallbacks() {
|
||||
|
@ -224,6 +226,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
|||
}
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
|
||||
code.ret();
|
||||
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
|
||||
|
||||
code.align();
|
||||
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
|
||||
|
@ -237,6 +240,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
|||
code.call(memory_write_128);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(code);
|
||||
code.ret();
|
||||
PerfMapRegister(write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_write_fallback_128");
|
||||
|
||||
if (value_idx == 4 || value_idx == 15) {
|
||||
continue;
|
||||
|
@ -255,6 +259,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
|||
}
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
|
||||
code.ret();
|
||||
PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_read_fallback_{}", bitsize));
|
||||
}
|
||||
|
||||
for (auto& [bitsize, callback] : write_callbacks) {
|
||||
|
@ -279,6 +284,7 @@ void A64EmitX64::GenFastmemFallbacks() {
|
|||
callback.EmitCall(code);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStack(code);
|
||||
code.ret();
|
||||
PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_write_fallback_{}", bitsize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -999,6 +1005,13 @@ void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* i
|
|||
EmitExclusiveWrite(ctx, inst, 128);
|
||||
}
|
||||
|
||||
std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const {
|
||||
const A64::LocationDescriptor descriptor{ir_descriptor};
|
||||
return fmt::format("a64_{:016X}_fpcr{:08X}",
|
||||
descriptor.PC(),
|
||||
descriptor.FPCR().Value());
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
|
||||
code.SwitchMxcsrOnExit();
|
||||
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code,
|
||||
|
|
|
@ -74,6 +74,9 @@ protected:
|
|||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -223,6 +224,8 @@ void BlockOfCode::GenRunCode() {
|
|||
align();
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
|
||||
emit_return_from_run_code(true, true);
|
||||
|
||||
PerfMapRegister(run_code_from, getCurr(), "dynarmic_dispatcher");
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "backend/x64/perf_map.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
|
@ -285,6 +286,15 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
|||
code.L(pass);
|
||||
}
|
||||
|
||||
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
|
||||
PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor));
|
||||
Patch(descriptor, entrypoint);
|
||||
|
||||
BlockDescriptor block_desc{entrypoint, size};
|
||||
block_descriptors.emplace(descriptor.Value(), block_desc);
|
||||
return block_desc;
|
||||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
||||
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
|
@ -325,6 +335,8 @@ void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
|||
void EmitX64::ClearCache() {
|
||||
block_descriptors.clear();
|
||||
patch_information.clear();
|
||||
|
||||
PerfMapClear();
|
||||
}
|
||||
|
||||
void EmitX64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
@ -85,9 +86,11 @@ protected:
|
|||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
|
||||
void EmitAddCycles(size_t cycles);
|
||||
Xbyak::Label EmitCond(IR::Cond cond);
|
||||
void EmitCondPrelude(const IR::Block& block);
|
||||
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
|
||||
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
|
||||
|
||||
// Terminal instruction emitters
|
||||
|
|
89
src/backend/x64/perf_map.cpp
Normal file
89
src/backend/x64/perf_map.cpp
Normal file
|
@ -0,0 +1,89 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
namespace {
|
||||
std::mutex mutex;
|
||||
std::FILE* file = nullptr;
|
||||
|
||||
void OpenFile() {
|
||||
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
|
||||
if (!perf_dir) {
|
||||
file = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
const pid_t pid = getpid();
|
||||
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
|
||||
|
||||
file = std::fopen(filename.c_str(), "w");
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::setvbuf(file, nullptr, _IONBF, 0);
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
namespace detail {
|
||||
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
|
||||
std::lock_guard guard{mutex};
|
||||
|
||||
if (!file) {
|
||||
OpenFile();
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
|
||||
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
void PerfMapClear() {
|
||||
std::lock_guard guard{mutex};
|
||||
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::fclose(file);
|
||||
file = nullptr;
|
||||
OpenFile();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
|
||||
#else
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
namespace detail {
|
||||
void PerfMapRegister(const void*, const void*, const std::string&) {}
|
||||
} // namespace detail
|
||||
|
||||
void PerfMapClear() {}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
|
||||
#endif
|
27
src/backend/x64/perf_map.h
Normal file
27
src/backend/x64/perf_map.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
#include "common/cast_util.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
namespace detail {
|
||||
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
|
||||
} // namespace detail
|
||||
|
||||
template<typename T>
|
||||
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
|
||||
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
|
||||
}
|
||||
|
||||
void PerfMapClear();
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
Loading…
Reference in a new issue