a32_emit_x64: Implement fastmem

This commit is contained in:
MerryMage 2020-04-08 16:56:57 +01:00
parent f9b9081d4c
commit 4636055646
11 changed files with 180 additions and 15 deletions

View file

@ -101,6 +101,15 @@ struct UserConfig {
/// This can be avoided by carefully allocating the memory region. /// This can be avoided by carefully allocating the memory region.
bool absolute_offset_page_table = false; bool absolute_offset_page_table = false;
// Fastmem Pointer
// This should point to the beginning of a 4GB address space which is in arranged just like
// what you wish for emulated memory to be. If the host page faults on an address, the JIT
// will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
void* fastmem_pointer = nullptr;
/// Determines if instructions that pagefault should cause recompilation of that block
/// with fastmem disabled.
bool recompile_on_fastmem_failure = true;
// Coprocessors // Coprocessors
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{}; std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};

View file

@ -248,6 +248,7 @@ if (ARCHITECTURE_x86_64)
backend/x64/emit_x64_sm4.cpp backend/x64/emit_x64_sm4.cpp
backend/x64/emit_x64_vector.cpp backend/x64/emit_x64_vector.cpp
backend/x64/emit_x64_vector_floating_point.cpp backend/x64/emit_x64_vector_floating_point.cpp
backend/x64/exception_handler.h
backend/x64/hostloc.cpp backend/x64/hostloc.cpp
backend/x64/hostloc.h backend/x64/hostloc.h
backend/x64/jitstate_info.h backend/x64/jitstate_info.h

View file

@ -72,6 +72,10 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_
GenTerminalHandlers(); GenTerminalHandlers();
code.PreludeComplete(); code.PreludeComplete();
ClearFastDispatchTable(); ClearFastDispatchTable();
exception_handler.SetFastmemCallback([this](u64 rip_){
return FastmemCallback(rip_);
});
} }
A32EmitX64::~A32EmitX64() = default; A32EmitX64::~A32EmitX64() = default;
@ -91,6 +95,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
if (config.page_table) { if (config.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
} }
if (config.fastmem_pointer) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
}
return gprs; return gprs;
}(); }();
@ -146,6 +153,7 @@ void A32EmitX64::ClearCache() {
EmitX64::ClearCache(); EmitX64::ClearCache();
block_ranges.ClearCache(); block_ranges.ClearCache();
ClearFastDispatchTable(); ClearFastDispatchTable();
fastmem_patch_info.clear();
} }
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) { void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
@ -777,6 +785,32 @@ void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address); code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address);
} }
std::optional<A32EmitX64::DoNotFastmemMarker> A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const {
if (!config.fastmem_pointer || !exception_handler.SupportsFastmem()) {
return std::nullopt;
}
const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst));
if (do_not_fastmem.count(marker) > 0) {
return std::nullopt;
}
return marker;
}
FakeCall A32EmitX64::FastmemCallback(u64 rip_) {
const auto iter = fastmem_patch_info.find(rip_);
ASSERT(iter != fastmem_patch_info.end());
if (config.recompile_on_fastmem_failure) {
const auto marker = iter->second.marker;
do_not_fastmem.emplace(marker);
InvalidateBasicBlocks({std::get<0>(marker)});
}
FakeCall ret;
ret.call_rip = iter->second.callback;
ret.ret_rip = iter->second.resume_rip;
return ret;
}
static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc,
const A32::UserConfig& config, Xbyak::Label& abort, const A32::UserConfig& config, Xbyak::Label& abort,
Xbyak::Reg64 vaddr, Xbyak::Reg64 vaddr,
@ -823,13 +857,48 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
Xbyak::Label abort, end;
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
if (const auto marker = ShouldFastmem(ctx, inst)) {
const auto location = code.getCurr();
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[r13 + vaddr]);
break;
case 16:
code.movzx(value.cvt32(), word[r13 + vaddr]);
break;
case 32:
code.mov(value.cvt32(), dword[r13 + vaddr]);
break;
case 64:
code.mov(value, qword[r13 + vaddr]);
break;
default:
ASSERT_MSG(false, "Invalid bitsize");
break;
}
ctx.reg_alloc.DefineValue(inst, value);
fastmem_patch_info.emplace(
Common::BitCast<u64>(location),
FastmemPatchInfo{
Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn),
*marker,
}
);
return;
}
Xbyak::Label abort, end;
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value); const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value);
switch (bitsize) { switch (bitsize) {
case 8: case 8:
@ -845,7 +914,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(value, qword[src_ptr]); code.mov(value, qword[src_ptr]);
break; break;
default: default:
ASSERT_MSG(false, "Invalid bit_size"); ASSERT_MSG(false, "Invalid bitsize");
break; break;
} }
code.jmp(end); code.jmp(end);
@ -881,13 +950,46 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
return; return;
} }
Xbyak::Label abort, end;
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
if (const auto marker = ShouldFastmem(ctx, inst)) {
const auto location = code.getCurr();
switch (bitsize) {
case 8:
code.mov(code.byte[r13 + vaddr], value.cvt8());
break;
case 16:
code.mov(word[r13 + vaddr], value.cvt16());
break;
case 32:
code.mov(dword[r13 + vaddr], value.cvt32());
break;
case 64:
code.mov(qword[r13 + vaddr], value);
break;
default:
ASSERT_MSG(false, "Invalid bitsize");
break;
}
fastmem_patch_info.emplace(
Common::BitCast<u64>(location),
FastmemPatchInfo{
Common::BitCast<u64>(code.getCurr()),
Common::BitCast<u64>(wrapped_fn),
*marker,
}
);
return;
}
Xbyak::Label abort, end;
const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr); const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr);
switch (bitsize) { switch (bitsize) {
case 8: case 8:
@ -903,7 +1005,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(qword[dest_ptr], value); code.mov(qword[dest_ptr], value);
break; break;
default: default:
ASSERT_MSG(false, "Invalid bit_size"); ASSERT_MSG(false, "Invalid bitsize");
break; break;
} }
code.jmp(end); code.jmp(end);

View file

@ -7,6 +7,10 @@
#pragma once #pragma once
#include <array> #include <array>
#include <optional>
#include <set>
#include <tuple>
#include <unordered_map>
#include <dynarmic/A32/a32.h> #include <dynarmic/A32/a32.h>
#include <dynarmic/A32/config.h> #include <dynarmic/A32/config.h>
@ -46,7 +50,6 @@ protected:
const A32::UserConfig config; const A32::UserConfig config;
A32::Jit* jit_interface; A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges; BlockRangeInformation<u32> block_ranges;
ExceptionHandler exception_handler;
struct FastDispatchEntry { struct FastDispatchEntry {
u64 location_descriptor; u64 location_descriptor;
@ -78,6 +81,18 @@ protected:
// Helpers // Helpers
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override; std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
// Fastmem information
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
struct FastmemPatchInfo {
u64 resume_rip;
u64 callback;
DoNotFastmemMarker marker;
};
std::unordered_map<u64, FastmemPatchInfo> fastmem_patch_info;
std::set<DoNotFastmemMarker> do_not_fastmem;
std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
FakeCall FastmemCallback(u64 rip);
// Memory access helpers // Memory access helpers
template<std::size_t bitsize> template<std::size_t bitsize>
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst); void ReadMemory(A32EmitContext& ctx, IR::Inst* inst);

View file

@ -42,13 +42,13 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
} }
static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& config) { static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& config) {
if (!config.page_table) { return [config](BlockOfCode& code) {
return [](BlockOfCode&){}; if (config.page_table) {
code.mov(code.r14, Common::BitCast<u64>(config.page_table));
}
if (config.fastmem_pointer) {
code.mov(code.r13, Common::BitCast<u64>(config.fastmem_pointer));
} }
const u64 r14_value = Common::BitCast<u64>(config.page_table);
return [r14_value](BlockOfCode& code) {
code.mov(code.r14, r14_value);
}; };
} }

View file

@ -4,6 +4,7 @@
* General Public License version 2 or any later version. * General Public License version 2 or any later version.
*/ */
#include <iterator>
#include <unordered_map> #include <unordered_map>
#include "backend/x64/block_of_code.h" #include "backend/x64/block_of_code.h"
@ -28,6 +29,10 @@ using namespace Xbyak::util;
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: reg_alloc(reg_alloc), block(block) {} : reg_alloc(reg_alloc), block(block) {}
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
}
void EmitContext::EraseInstruction(IR::Inst* inst) { void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst); block.Instructions().erase(inst);
inst->ClearArgs(); inst->ClearArgs();

View file

@ -43,6 +43,7 @@ using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>
struct EmitContext { struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block); EmitContext(RegAlloc& reg_alloc, IR::Block& block);
size_t GetInstOffset(IR::Inst* inst) const;
void EraseInstruction(IR::Inst* inst); void EraseInstruction(IR::Inst* inst);
virtual FP::FPCR FPCR() const = 0; virtual FP::FPCR FPCR() const = 0;

View file

@ -6,18 +6,30 @@
#pragma once #pragma once
#include <functional>
#include <memory> #include <memory>
#include "common/common_types.h"
namespace Dynarmic::Backend::X64 { namespace Dynarmic::Backend::X64 {
class BlockOfCode; class BlockOfCode;
struct FakeCall {
u64 call_rip;
u64 ret_rip;
};
class ExceptionHandler final { class ExceptionHandler final {
public: public:
ExceptionHandler(); ExceptionHandler();
~ExceptionHandler(); ~ExceptionHandler();
void Register(BlockOfCode& code); void Register(BlockOfCode& code);
bool SupportsFastmem() const noexcept;
void SetFastmemCallback(std::function<FakeCall(u64)> cb);
private: private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;

View file

@ -18,4 +18,12 @@ void ExceptionHandler::Register(BlockOfCode&) {
// Do nothing // Do nothing
} }
bool ExceptionHandler::SupportsFastmem() const noexcept {
return false;
}
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
// Do nothing
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -198,4 +198,12 @@ void ExceptionHandler::Register(BlockOfCode& code) {
impl = std::make_unique<Impl>(rfuncs, code.getCode()); impl = std::make_unique<Impl>(rfuncs, code.getCode());
} }
bool ExceptionHandler::SupportsFastmem() const noexcept {
return false;
}
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
// Do nothing
}
} // namespace Dynarmic::Backend::X64 } // namespace Dynarmic::Backend::X64

View file

@ -33,13 +33,17 @@ private:
std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor); std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor);
inline bool operator<(const LocationDescriptor& x, const LocationDescriptor& y) noexcept {
return x.Value() < y.Value();
}
} // namespace Dynarmic::IR } // namespace Dynarmic::IR
namespace std { namespace std {
template <> template <>
struct less<Dynarmic::IR::LocationDescriptor> { struct less<Dynarmic::IR::LocationDescriptor> {
bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept { bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept {
return x.Value() < y.Value(); return x < y;
} }
}; };
template <> template <>