a32_emit_x64: Implement fastmem
This commit is contained in:
parent
f9b9081d4c
commit
4636055646
11 changed files with 180 additions and 15 deletions
|
@ -101,6 +101,15 @@ struct UserConfig {
|
|||
/// This can be avoided by carefully allocating the memory region.
|
||||
bool absolute_offset_page_table = false;
|
||||
|
||||
// Fastmem Pointer
|
||||
// This should point to the beginning of a 4GB address space which is in arranged just like
|
||||
// what you wish for emulated memory to be. If the host page faults on an address, the JIT
|
||||
// will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
|
||||
void* fastmem_pointer = nullptr;
|
||||
/// Determines if instructions that pagefault should cause recompilation of that block
|
||||
/// with fastmem disabled.
|
||||
bool recompile_on_fastmem_failure = true;
|
||||
|
||||
// Coprocessors
|
||||
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};
|
||||
|
||||
|
|
|
@ -248,6 +248,7 @@ if (ARCHITECTURE_x86_64)
|
|||
backend/x64/emit_x64_sm4.cpp
|
||||
backend/x64/emit_x64_vector.cpp
|
||||
backend/x64/emit_x64_vector_floating_point.cpp
|
||||
backend/x64/exception_handler.h
|
||||
backend/x64/hostloc.cpp
|
||||
backend/x64/hostloc.h
|
||||
backend/x64/jitstate_info.h
|
||||
|
|
|
@ -72,6 +72,10 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_
|
|||
GenTerminalHandlers();
|
||||
code.PreludeComplete();
|
||||
ClearFastDispatchTable();
|
||||
|
||||
exception_handler.SetFastmemCallback([this](u64 rip_){
|
||||
return FastmemCallback(rip_);
|
||||
});
|
||||
}
|
||||
|
||||
A32EmitX64::~A32EmitX64() = default;
|
||||
|
@ -91,6 +95,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
if (config.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
if (config.fastmem_pointer) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||
}
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
|
@ -146,6 +153,7 @@ void A32EmitX64::ClearCache() {
|
|||
EmitX64::ClearCache();
|
||||
block_ranges.ClearCache();
|
||||
ClearFastDispatchTable();
|
||||
fastmem_patch_info.clear();
|
||||
}
|
||||
|
||||
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
|
||||
|
@ -777,6 +785,32 @@ void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address);
|
||||
}
|
||||
|
||||
std::optional<A32EmitX64::DoNotFastmemMarker> A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const {
|
||||
if (!config.fastmem_pointer || !exception_handler.SupportsFastmem()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst));
|
||||
if (do_not_fastmem.count(marker) > 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return marker;
|
||||
}
|
||||
|
||||
FakeCall A32EmitX64::FastmemCallback(u64 rip_) {
|
||||
const auto iter = fastmem_patch_info.find(rip_);
|
||||
ASSERT(iter != fastmem_patch_info.end());
|
||||
if (config.recompile_on_fastmem_failure) {
|
||||
const auto marker = iter->second.marker;
|
||||
do_not_fastmem.emplace(marker);
|
||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||
}
|
||||
FakeCall ret;
|
||||
ret.call_rip = iter->second.callback;
|
||||
ret.ret_rip = iter->second.resume_rip;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc,
|
||||
const A32::UserConfig& config, Xbyak::Label& abort,
|
||||
Xbyak::Reg64 vaddr,
|
||||
|
@ -823,13 +857,48 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
return;
|
||||
}
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
||||
|
||||
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||
const auto location = code.getCurr();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.movzx(value.cvt32(), code.byte[r13 + vaddr]);
|
||||
break;
|
||||
case 16:
|
||||
code.movzx(value.cvt32(), word[r13 + vaddr]);
|
||||
break;
|
||||
case 32:
|
||||
code.mov(value.cvt32(), dword[r13 + vaddr]);
|
||||
break;
|
||||
case 64:
|
||||
code.mov(value, qword[r13 + vaddr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bitsize");
|
||||
break;
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, value);
|
||||
|
||||
fastmem_patch_info.emplace(
|
||||
Common::BitCast<u64>(location),
|
||||
FastmemPatchInfo{
|
||||
Common::BitCast<u64>(code.getCurr()),
|
||||
Common::BitCast<u64>(wrapped_fn),
|
||||
*marker,
|
||||
}
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value);
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
|
@ -845,7 +914,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(value, qword[src_ptr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bit_size");
|
||||
ASSERT_MSG(false, "Invalid bitsize");
|
||||
break;
|
||||
}
|
||||
code.jmp(end);
|
||||
|
@ -881,13 +950,46 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
return;
|
||||
}
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||
|
||||
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
||||
|
||||
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||
const auto location = code.getCurr();
|
||||
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.mov(code.byte[r13 + vaddr], value.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.mov(word[r13 + vaddr], value.cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.mov(dword[r13 + vaddr], value.cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.mov(qword[r13 + vaddr], value);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bitsize");
|
||||
break;
|
||||
}
|
||||
|
||||
fastmem_patch_info.emplace(
|
||||
Common::BitCast<u64>(location),
|
||||
FastmemPatchInfo{
|
||||
Common::BitCast<u64>(code.getCurr()),
|
||||
Common::BitCast<u64>(wrapped_fn),
|
||||
*marker,
|
||||
}
|
||||
);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
Xbyak::Label abort, end;
|
||||
|
||||
const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr);
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
|
@ -903,7 +1005,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(qword[dest_ptr], value);
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bit_size");
|
||||
ASSERT_MSG(false, "Invalid bitsize");
|
||||
break;
|
||||
}
|
||||
code.jmp(end);
|
||||
|
|
|
@ -7,6 +7,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <dynarmic/A32/a32.h>
|
||||
#include <dynarmic/A32/config.h>
|
||||
|
@ -46,7 +50,6 @@ protected:
|
|||
const A32::UserConfig config;
|
||||
A32::Jit* jit_interface;
|
||||
BlockRangeInformation<u32> block_ranges;
|
||||
ExceptionHandler exception_handler;
|
||||
|
||||
struct FastDispatchEntry {
|
||||
u64 location_descriptor;
|
||||
|
@ -78,6 +81,18 @@ protected:
|
|||
// Helpers
|
||||
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||
|
||||
// Fastmem information
|
||||
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
|
||||
struct FastmemPatchInfo {
|
||||
u64 resume_rip;
|
||||
u64 callback;
|
||||
DoNotFastmemMarker marker;
|
||||
};
|
||||
std::unordered_map<u64, FastmemPatchInfo> fastmem_patch_info;
|
||||
std::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
|
||||
FakeCall FastmemCallback(u64 rip);
|
||||
|
||||
// Memory access helpers
|
||||
template<std::size_t bitsize>
|
||||
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst);
|
||||
|
|
|
@ -42,13 +42,13 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
|
|||
}
|
||||
|
||||
static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& config) {
|
||||
if (!config.page_table) {
|
||||
return [](BlockOfCode&){};
|
||||
return [config](BlockOfCode& code) {
|
||||
if (config.page_table) {
|
||||
code.mov(code.r14, Common::BitCast<u64>(config.page_table));
|
||||
}
|
||||
if (config.fastmem_pointer) {
|
||||
code.mov(code.r13, Common::BitCast<u64>(config.fastmem_pointer));
|
||||
}
|
||||
|
||||
const u64 r14_value = Common::BitCast<u64>(config.page_table);
|
||||
return [r14_value](BlockOfCode& code) {
|
||||
code.mov(code.r14, r14_value);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <iterator>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
|
@ -28,6 +29,10 @@ using namespace Xbyak::util;
|
|||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||
: reg_alloc(reg_alloc), block(block) {}
|
||||
|
||||
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
|
||||
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
|
||||
}
|
||||
|
||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||
block.Instructions().erase(inst);
|
||||
inst->ClearArgs();
|
||||
|
|
|
@ -43,6 +43,7 @@ using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>
|
|||
struct EmitContext {
|
||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
|
||||
size_t GetInstOffset(IR::Inst* inst) const;
|
||||
void EraseInstruction(IR::Inst* inst);
|
||||
|
||||
virtual FP::FPCR FPCR() const = 0;
|
||||
|
|
|
@ -6,18 +6,30 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
struct FakeCall {
|
||||
u64 call_rip;
|
||||
u64 ret_rip;
|
||||
};
|
||||
|
||||
class ExceptionHandler final {
|
||||
public:
|
||||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
void Register(BlockOfCode& code);
|
||||
|
||||
bool SupportsFastmem() const noexcept;
|
||||
void SetFastmemCallback(std::function<FakeCall(u64)> cb);
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
|
|
|
@ -18,4 +18,12 @@ void ExceptionHandler::Register(BlockOfCode&) {
|
|||
// Do nothing
|
||||
}
|
||||
|
||||
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -198,4 +198,12 @@ void ExceptionHandler::Register(BlockOfCode& code) {
|
|||
impl = std::make_unique<Impl>(rfuncs, code.getCode());
|
||||
}
|
||||
|
||||
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -33,13 +33,17 @@ private:
|
|||
|
||||
std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor);
|
||||
|
||||
inline bool operator<(const LocationDescriptor& x, const LocationDescriptor& y) noexcept {
|
||||
return x.Value() < y.Value();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct less<Dynarmic::IR::LocationDescriptor> {
|
||||
bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept {
|
||||
return x.Value() < y.Value();
|
||||
return x < y;
|
||||
}
|
||||
};
|
||||
template <>
|
||||
|
|
Loading…
Reference in a new issue