Implement perfmap

This commit is contained in:
MerryMage 2018-07-27 12:42:10 +01:00
parent f73104633b
commit de4494ffa5
10 changed files with 184 additions and 12 deletions

View file

@ -244,6 +244,8 @@ if (ARCHITECTURE_x86_64)
backend/x64/hostloc.h
backend/x64/jitstate_info.h
backend/x64/oparg.h
backend/x64/perf_map.cpp
backend/x64/perf_map.h
backend/x64/reg_alloc.cpp
backend/x64/reg_alloc.h
)

View file

@ -8,6 +8,7 @@
#include <unordered_set>
#include <utility>
#include <fmt/format.h>
#include <fmt/ostream.h>
#include <dynarmic/A32/coprocessor.h>
@ -18,6 +19,7 @@
#include "backend/x64/block_of_code.h"
#include "backend/x64/devirtualize.h"
#include "backend/x64/emit_x64.h"
#include "backend/x64/perf_map.h"
#include "common/address_range.h"
#include "common/assert.h"
#include "common/bit_util.h"
@ -132,17 +134,15 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
code.int3();
const A32::LocationDescriptor descriptor{block.Location()};
Patch(descriptor, entrypoint);
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const A32::LocationDescriptor descriptor{block.Location()};
const A32::LocationDescriptor end_location{block.EndLocation()};
const auto range = boost::icl::discrete_interval<u32>::closed(descriptor.PC(), end_location.PC() - 1);
A32EmitX64::BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.UniqueHash(), block_desc);
block_ranges.AddRange(range, descriptor);
return block_desc;
return RegisterBlock(descriptor, entrypoint, size);
}
void A32EmitX64::ClearCache() {
@ -161,6 +161,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryRead8>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(read_memory_8, code.getCurr(), "a32_read_memory_8");
code.align();
read_memory_16 = code.getCurr<const void*>();
@ -168,6 +169,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryRead16>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(read_memory_16, code.getCurr(), "a32_read_memory_16");
code.align();
read_memory_32 = code.getCurr<const void*>();
@ -175,6 +177,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryRead32>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(read_memory_32, code.getCurr(), "a32_read_memory_32");
code.align();
read_memory_64 = code.getCurr<const void*>();
@ -182,6 +185,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryRead64>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(read_memory_64, code.getCurr(), "a32_read_memory_64");
code.align();
write_memory_8 = code.getCurr<const void*>();
@ -189,6 +193,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryWrite8>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(write_memory_8, code.getCurr(), "a32_write_memory_8");
code.align();
write_memory_16 = code.getCurr<const void*>();
@ -196,6 +201,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryWrite16>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(write_memory_16, code.getCurr(), "a32_write_memory_16");
code.align();
write_memory_32 = code.getCurr<const void*>();
@ -203,6 +209,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryWrite32>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(write_memory_32, code.getCurr(), "a32_write_memory_32");
code.align();
write_memory_64 = code.getCurr<const void*>();
@ -210,6 +217,7 @@ void A32EmitX64::GenMemoryAccessors() {
Devirtualize<&A32::UserCallbacks::MemoryWrite64>(config.callbacks).EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, ABI_RETURN);
code.ret();
PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64");
}
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
@ -1141,6 +1149,15 @@ void A32EmitX64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) {
CallCoprocCallback(code, ctx.reg_alloc, jit_interface, *action, nullptr, args[1]);
}
std::string A32EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const {
const A32::LocationDescriptor descriptor{ir_descriptor};
return fmt::format("a32_{}{:08X}_{}_fpcr{:08X}",
descriptor.TFlag() ? "t" : "a",
descriptor.PC(),
descriptor.EFlag() ? "be" : "le",
descriptor.FPSCR().Value());
}
void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) {
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented");
ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented");

View file

@ -68,6 +68,9 @@ protected:
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Terminal instruction emitters
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;

View file

@ -7,6 +7,7 @@
#include <initializer_list>
#include <dynarmic/A64/exclusive_monitor.h>
#include <fmt/format.h>
#include <fmt/ostream.h>
#include "backend/x64/a64_emit_x64.h"
@ -15,6 +16,7 @@
#include "backend/x64/block_of_code.h"
#include "backend/x64/devirtualize.h"
#include "backend/x64/emit_x64.h"
#include "backend/x64/perf_map.h"
#include "common/address_range.h"
#include "common/assert.h"
#include "common/bit_util.h"
@ -118,17 +120,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
EmitX64::EmitTerminal(block.GetTerminal(), block.Location());
code.int3();
const A64::LocationDescriptor descriptor{block.Location()};
Patch(descriptor, entrypoint);
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const A64::LocationDescriptor descriptor{block.Location()};
const A64::LocationDescriptor end_location{block.EndLocation()};
const auto range = boost::icl::discrete_interval<u64>::closed(descriptor.PC(), end_location.PC() - 1);
A64EmitX64::BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.UniqueHash(), block_desc);
block_ranges.AddRange(range, descriptor);
return block_desc;
return RegisterBlock(descriptor, entrypoint, size);
}
void A64EmitX64::ClearCache() {
@ -166,6 +166,7 @@ void A64EmitX64::GenMemory128Accessors() {
code.add(rsp, 8);
#endif
code.ret();
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_read_128");
code.align();
memory_write_128 = code.getCurr<void(*)()>();
@ -189,6 +190,7 @@ void A64EmitX64::GenMemory128Accessors() {
code.add(rsp, 8);
#endif
code.ret();
PerfMapRegister(memory_read_128, code.getCurr(), "a64_memory_write_128");
}
void A64EmitX64::GenFastmemFallbacks() {
@ -224,6 +226,7 @@ void A64EmitX64::GenFastmemFallbacks() {
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
code.ret();
PerfMapRegister(read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_read_fallback_128");
code.align();
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
@ -237,6 +240,7 @@ void A64EmitX64::GenFastmemFallbacks() {
code.call(memory_write_128);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
PerfMapRegister(write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)], code.getCurr(), "a64_write_fallback_128");
if (value_idx == 4 || value_idx == 15) {
continue;
@ -255,6 +259,7 @@ void A64EmitX64::GenFastmemFallbacks() {
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
code.ret();
PerfMapRegister(read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_read_fallback_{}", bitsize));
}
for (auto& [bitsize, callback] : write_callbacks) {
@ -279,6 +284,7 @@ void A64EmitX64::GenFastmemFallbacks() {
callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
PerfMapRegister(write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)], code.getCurr(), fmt::format("a64_write_fallback_{}", bitsize));
}
}
}
@ -999,6 +1005,13 @@ void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* i
EmitExclusiveWrite(ctx, inst, 128);
}
std::string A64EmitX64::LocationDescriptorToFriendlyName(const IR::LocationDescriptor& ir_descriptor) const {
const A64::LocationDescriptor descriptor{ir_descriptor};
return fmt::format("a64_{:016X}_fpcr{:08X}",
descriptor.PC(),
descriptor.FPCR().Value());
}
void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) {
code.SwitchMxcsrOnExit();
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code,

View file

@ -74,6 +74,9 @@ protected:
#undef A32OPC
#undef A64OPC
// Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Terminal instruction emitters
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;

View file

@ -13,6 +13,7 @@
#include "backend/x64/a32_jitstate.h"
#include "backend/x64/abi.h"
#include "backend/x64/block_of_code.h"
#include "backend/x64/perf_map.h"
#include "common/assert.h"
#ifdef _WIN32
@ -223,6 +224,8 @@ void BlockOfCode::GenRunCode() {
align();
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
emit_return_from_run_code(true, true);
PerfMapRegister(run_code_from, getCurr(), "dynarmic_dispatcher");
}
void BlockOfCode::SwitchMxcsrOnEntry() {

View file

@ -8,6 +8,7 @@
#include "backend/x64/block_of_code.h"
#include "backend/x64/emit_x64.h"
#include "backend/x64/perf_map.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
@ -285,6 +286,15 @@ void EmitX64::EmitCondPrelude(const IR::Block& block) {
code.L(pass);
}
EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& descriptor, CodePtr entrypoint, size_t size) {
PerfMapRegister(entrypoint, code.getCurr(), LocationDescriptorToFriendlyName(descriptor));
Patch(descriptor, entrypoint);
BlockDescriptor block_desc{entrypoint, size};
block_descriptors.emplace(descriptor.Value(), block_desc);
return block_desc;
}
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
using T = std::decay_t<decltype(x)>;
@ -325,6 +335,8 @@ void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
void EmitX64::ClearCache() {
block_descriptors.clear();
patch_information.clear();
PerfMapClear();
}
void EmitX64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {

View file

@ -7,6 +7,7 @@
#pragma once
#include <array>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
@ -85,9 +86,11 @@ protected:
#undef A64OPC
// Helpers
virtual std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const = 0;
void EmitAddCycles(size_t cycles);
Xbyak::Label EmitCond(IR::Cond cond);
void EmitCondPrelude(const IR::Block& block);
BlockDescriptor RegisterBlock(const IR::LocationDescriptor& location_descriptor, CodePtr entrypoint, size_t size);
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
// Terminal instruction emitters

View file

@ -0,0 +1,89 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include <cstddef>
#include <string>
#ifdef __linux__
#include <cstdio>
#include <cstdlib>
#include <mutex>
#include <sys/types.h>
#include <unistd.h>
#include <fmt/format.h>
#include "common/common_types.h"
namespace Dynarmic::BackendX64 {
namespace {
std::mutex mutex;
std::FILE* file = nullptr;
void OpenFile() {
const char* perf_dir = std::getenv("PERF_BUILDID_DIR");
if (!perf_dir) {
file = nullptr;
return;
}
const pid_t pid = getpid();
const std::string filename = fmt::format("{:s}/perf-{:d}.map", perf_dir, pid);
file = std::fopen(filename.c_str(), "w");
if (!file) {
return;
}
std::setvbuf(file, nullptr, _IONBF, 0);
}
} // anonymous namespace
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name) {
std::lock_guard guard{mutex};
if (!file) {
OpenFile();
if (!file) {
return;
}
}
const std::string line = fmt::format("{:016x} {:016x} {:s}\n", reinterpret_cast<u64>(start), reinterpret_cast<u64>(end) - reinterpret_cast<u64>(start), friendly_name);
std::fwrite(line.data(), sizeof *line.data(), line.size(), file);
}
} // namespace detail
void PerfMapClear() {
std::lock_guard guard{mutex};
if (!file) {
return;
}
std::fclose(file);
file = nullptr;
OpenFile();
}
} // namespace Dynarmic::BackendX64
#else
namespace Dynarmic::BackendX64 {
namespace detail {
void PerfMapRegister(const void*, const void*, const std::string&) {}
} // namespace detail
void PerfMapClear() {}
} // namespace Dynarmic::BackendX64
#endif

View file

@ -0,0 +1,27 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <cstddef>
#include <string>
#include "common/cast_util.h"
namespace Dynarmic::BackendX64 {
namespace detail {
void PerfMapRegister(const void* start, const void* end, const std::string& friendly_name);
} // namespace detail
template<typename T>
void PerfMapRegister(T start, const void* end, const std::string& friendly_name) {
detail::PerfMapRegister(Common::BitCast<const void*>(start), end, friendly_name);
}
void PerfMapClear();
} // namespace Dynarmic::BackendX64