a32_emit_x64: Implement fastmem
This commit is contained in:
parent
f9b9081d4c
commit
4636055646
11 changed files with 180 additions and 15 deletions
|
@ -101,6 +101,15 @@ struct UserConfig {
|
||||||
/// This can be avoided by carefully allocating the memory region.
|
/// This can be avoided by carefully allocating the memory region.
|
||||||
bool absolute_offset_page_table = false;
|
bool absolute_offset_page_table = false;
|
||||||
|
|
||||||
|
// Fastmem Pointer
|
||||||
|
// This should point to the beginning of a 4GB address space which is in arranged just like
|
||||||
|
// what you wish for emulated memory to be. If the host page faults on an address, the JIT
|
||||||
|
// will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
|
||||||
|
void* fastmem_pointer = nullptr;
|
||||||
|
/// Determines if instructions that pagefault should cause recompilation of that block
|
||||||
|
/// with fastmem disabled.
|
||||||
|
bool recompile_on_fastmem_failure = true;
|
||||||
|
|
||||||
// Coprocessors
|
// Coprocessors
|
||||||
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};
|
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};
|
||||||
|
|
||||||
|
|
|
@ -248,6 +248,7 @@ if (ARCHITECTURE_x86_64)
|
||||||
backend/x64/emit_x64_sm4.cpp
|
backend/x64/emit_x64_sm4.cpp
|
||||||
backend/x64/emit_x64_vector.cpp
|
backend/x64/emit_x64_vector.cpp
|
||||||
backend/x64/emit_x64_vector_floating_point.cpp
|
backend/x64/emit_x64_vector_floating_point.cpp
|
||||||
|
backend/x64/exception_handler.h
|
||||||
backend/x64/hostloc.cpp
|
backend/x64/hostloc.cpp
|
||||||
backend/x64/hostloc.h
|
backend/x64/hostloc.h
|
||||||
backend/x64/jitstate_info.h
|
backend/x64/jitstate_info.h
|
||||||
|
|
|
@ -72,6 +72,10 @@ A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_
|
||||||
GenTerminalHandlers();
|
GenTerminalHandlers();
|
||||||
code.PreludeComplete();
|
code.PreludeComplete();
|
||||||
ClearFastDispatchTable();
|
ClearFastDispatchTable();
|
||||||
|
|
||||||
|
exception_handler.SetFastmemCallback([this](u64 rip_){
|
||||||
|
return FastmemCallback(rip_);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
A32EmitX64::~A32EmitX64() = default;
|
A32EmitX64::~A32EmitX64() = default;
|
||||||
|
@ -91,6 +95,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
||||||
if (config.page_table) {
|
if (config.page_table) {
|
||||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||||
}
|
}
|
||||||
|
if (config.fastmem_pointer) {
|
||||||
|
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||||
|
}
|
||||||
return gprs;
|
return gprs;
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
@ -146,6 +153,7 @@ void A32EmitX64::ClearCache() {
|
||||||
EmitX64::ClearCache();
|
EmitX64::ClearCache();
|
||||||
block_ranges.ClearCache();
|
block_ranges.ClearCache();
|
||||||
ClearFastDispatchTable();
|
ClearFastDispatchTable();
|
||||||
|
fastmem_patch_info.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
|
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
|
||||||
|
@ -777,6 +785,32 @@ void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address);
|
code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<A32EmitX64::DoNotFastmemMarker> A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const {
|
||||||
|
if (!config.fastmem_pointer || !exception_handler.SupportsFastmem()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst));
|
||||||
|
if (do_not_fastmem.count(marker) > 0) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return marker;
|
||||||
|
}
|
||||||
|
|
||||||
|
FakeCall A32EmitX64::FastmemCallback(u64 rip_) {
|
||||||
|
const auto iter = fastmem_patch_info.find(rip_);
|
||||||
|
ASSERT(iter != fastmem_patch_info.end());
|
||||||
|
if (config.recompile_on_fastmem_failure) {
|
||||||
|
const auto marker = iter->second.marker;
|
||||||
|
do_not_fastmem.emplace(marker);
|
||||||
|
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||||
|
}
|
||||||
|
FakeCall ret;
|
||||||
|
ret.call_rip = iter->second.callback;
|
||||||
|
ret.ret_rip = iter->second.resume_rip;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc,
|
static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc,
|
||||||
const A32::UserConfig& config, Xbyak::Label& abort,
|
const A32::UserConfig& config, Xbyak::Label& abort,
|
||||||
Xbyak::Reg64 vaddr,
|
Xbyak::Reg64 vaddr,
|
||||||
|
@ -823,13 +857,48 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Label abort, end;
|
|
||||||
|
|
||||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
||||||
|
|
||||||
|
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||||
|
const auto location = code.getCurr();
|
||||||
|
|
||||||
|
switch (bitsize) {
|
||||||
|
case 8:
|
||||||
|
code.movzx(value.cvt32(), code.byte[r13 + vaddr]);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.movzx(value.cvt32(), word[r13 + vaddr]);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.mov(value.cvt32(), dword[r13 + vaddr]);
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
code.mov(value, qword[r13 + vaddr]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Invalid bitsize");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.reg_alloc.DefineValue(inst, value);
|
||||||
|
|
||||||
|
fastmem_patch_info.emplace(
|
||||||
|
Common::BitCast<u64>(location),
|
||||||
|
FastmemPatchInfo{
|
||||||
|
Common::BitCast<u64>(code.getCurr()),
|
||||||
|
Common::BitCast<u64>(wrapped_fn),
|
||||||
|
*marker,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label abort, end;
|
||||||
|
|
||||||
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value);
|
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr, value);
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -845,7 +914,7 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.mov(value, qword[src_ptr]);
|
code.mov(value, qword[src_ptr]);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(false, "Invalid bit_size");
|
ASSERT_MSG(false, "Invalid bitsize");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
|
@ -881,13 +950,46 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Xbyak::Label abort, end;
|
|
||||||
|
|
||||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
|
||||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
|
|
||||||
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
|
||||||
|
|
||||||
|
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||||
|
const auto location = code.getCurr();
|
||||||
|
|
||||||
|
switch (bitsize) {
|
||||||
|
case 8:
|
||||||
|
code.mov(code.byte[r13 + vaddr], value.cvt8());
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
code.mov(word[r13 + vaddr], value.cvt16());
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
code.mov(dword[r13 + vaddr], value.cvt32());
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
code.mov(qword[r13 + vaddr], value);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT_MSG(false, "Invalid bitsize");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastmem_patch_info.emplace(
|
||||||
|
Common::BitCast<u64>(location),
|
||||||
|
FastmemPatchInfo{
|
||||||
|
Common::BitCast<u64>(code.getCurr()),
|
||||||
|
Common::BitCast<u64>(wrapped_fn),
|
||||||
|
*marker,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Label abort, end;
|
||||||
|
|
||||||
const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr);
|
const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, config, abort, vaddr);
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -903,7 +1005,7 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
|
||||||
code.mov(qword[dest_ptr], value);
|
code.mov(qword[dest_ptr], value);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(false, "Invalid bit_size");
|
ASSERT_MSG(false, "Invalid bitsize");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
code.jmp(end);
|
code.jmp(end);
|
||||||
|
|
|
@ -7,6 +7,10 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <optional>
|
||||||
|
#include <set>
|
||||||
|
#include <tuple>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <dynarmic/A32/a32.h>
|
#include <dynarmic/A32/a32.h>
|
||||||
#include <dynarmic/A32/config.h>
|
#include <dynarmic/A32/config.h>
|
||||||
|
@ -46,7 +50,6 @@ protected:
|
||||||
const A32::UserConfig config;
|
const A32::UserConfig config;
|
||||||
A32::Jit* jit_interface;
|
A32::Jit* jit_interface;
|
||||||
BlockRangeInformation<u32> block_ranges;
|
BlockRangeInformation<u32> block_ranges;
|
||||||
ExceptionHandler exception_handler;
|
|
||||||
|
|
||||||
struct FastDispatchEntry {
|
struct FastDispatchEntry {
|
||||||
u64 location_descriptor;
|
u64 location_descriptor;
|
||||||
|
@ -78,6 +81,18 @@ protected:
|
||||||
// Helpers
|
// Helpers
|
||||||
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
std::string LocationDescriptorToFriendlyName(const IR::LocationDescriptor&) const override;
|
||||||
|
|
||||||
|
// Fastmem information
|
||||||
|
using DoNotFastmemMarker = std::tuple<IR::LocationDescriptor, std::ptrdiff_t>;
|
||||||
|
struct FastmemPatchInfo {
|
||||||
|
u64 resume_rip;
|
||||||
|
u64 callback;
|
||||||
|
DoNotFastmemMarker marker;
|
||||||
|
};
|
||||||
|
std::unordered_map<u64, FastmemPatchInfo> fastmem_patch_info;
|
||||||
|
std::set<DoNotFastmemMarker> do_not_fastmem;
|
||||||
|
std::optional<DoNotFastmemMarker> ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const;
|
||||||
|
FakeCall FastmemCallback(u64 rip);
|
||||||
|
|
||||||
// Memory access helpers
|
// Memory access helpers
|
||||||
template<std::size_t bitsize>
|
template<std::size_t bitsize>
|
||||||
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst);
|
void ReadMemory(A32EmitContext& ctx, IR::Inst* inst);
|
||||||
|
|
|
@ -42,13 +42,13 @@ static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*Lo
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& config) {
|
static std::function<void(BlockOfCode&)> GenRCP(const A32::UserConfig& config) {
|
||||||
if (!config.page_table) {
|
return [config](BlockOfCode& code) {
|
||||||
return [](BlockOfCode&){};
|
if (config.page_table) {
|
||||||
|
code.mov(code.r14, Common::BitCast<u64>(config.page_table));
|
||||||
|
}
|
||||||
|
if (config.fastmem_pointer) {
|
||||||
|
code.mov(code.r13, Common::BitCast<u64>(config.fastmem_pointer));
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 r14_value = Common::BitCast<u64>(config.page_table);
|
|
||||||
return [r14_value](BlockOfCode& code) {
|
|
||||||
code.mov(code.r14, r14_value);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
* General Public License version 2 or any later version.
|
* General Public License version 2 or any later version.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <iterator>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "backend/x64/block_of_code.h"
|
#include "backend/x64/block_of_code.h"
|
||||||
|
@ -28,6 +29,10 @@ using namespace Xbyak::util;
|
||||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||||
: reg_alloc(reg_alloc), block(block) {}
|
: reg_alloc(reg_alloc), block(block) {}
|
||||||
|
|
||||||
|
size_t EmitContext::GetInstOffset(IR::Inst* inst) const {
|
||||||
|
return static_cast<size_t>(std::distance(block.begin(), IR::Block::iterator(inst)));
|
||||||
|
}
|
||||||
|
|
||||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||||
block.Instructions().erase(inst);
|
block.Instructions().erase(inst);
|
||||||
inst->ClearArgs();
|
inst->ClearArgs();
|
||||||
|
|
|
@ -43,6 +43,7 @@ using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>
|
||||||
struct EmitContext {
|
struct EmitContext {
|
||||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||||
|
|
||||||
|
size_t GetInstOffset(IR::Inst* inst) const;
|
||||||
void EraseInstruction(IR::Inst* inst);
|
void EraseInstruction(IR::Inst* inst);
|
||||||
|
|
||||||
virtual FP::FPCR FPCR() const = 0;
|
virtual FP::FPCR FPCR() const = 0;
|
||||||
|
|
|
@ -6,18 +6,30 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Dynarmic::Backend::X64 {
|
namespace Dynarmic::Backend::X64 {
|
||||||
|
|
||||||
class BlockOfCode;
|
class BlockOfCode;
|
||||||
|
|
||||||
|
struct FakeCall {
|
||||||
|
u64 call_rip;
|
||||||
|
u64 ret_rip;
|
||||||
|
};
|
||||||
|
|
||||||
class ExceptionHandler final {
|
class ExceptionHandler final {
|
||||||
public:
|
public:
|
||||||
ExceptionHandler();
|
ExceptionHandler();
|
||||||
~ExceptionHandler();
|
~ExceptionHandler();
|
||||||
|
|
||||||
void Register(BlockOfCode& code);
|
void Register(BlockOfCode& code);
|
||||||
|
|
||||||
|
bool SupportsFastmem() const noexcept;
|
||||||
|
void SetFastmemCallback(std::function<FakeCall(u64)> cb);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Impl;
|
struct Impl;
|
||||||
std::unique_ptr<Impl> impl;
|
std::unique_ptr<Impl> impl;
|
||||||
|
|
|
@ -18,4 +18,12 @@ void ExceptionHandler::Register(BlockOfCode&) {
|
||||||
// Do nothing
|
// Do nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -198,4 +198,12 @@ void ExceptionHandler::Register(BlockOfCode& code) {
|
||||||
impl = std::make_unique<Impl>(rfuncs, code.getCode());
|
impl = std::make_unique<Impl>(rfuncs, code.getCode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ExceptionHandler::SupportsFastmem() const noexcept {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExceptionHandler::SetFastmemCallback(std::function<FakeCall(u64)>) {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Backend::X64
|
} // namespace Dynarmic::Backend::X64
|
||||||
|
|
|
@ -33,13 +33,17 @@ private:
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor);
|
std::ostream& operator<<(std::ostream& o, const LocationDescriptor& descriptor);
|
||||||
|
|
||||||
|
inline bool operator<(const LocationDescriptor& x, const LocationDescriptor& y) noexcept {
|
||||||
|
return x.Value() < y.Value();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::IR
|
} // namespace Dynarmic::IR
|
||||||
|
|
||||||
namespace std {
|
namespace std {
|
||||||
template <>
|
template <>
|
||||||
struct less<Dynarmic::IR::LocationDescriptor> {
|
struct less<Dynarmic::IR::LocationDescriptor> {
|
||||||
bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept {
|
bool operator()(const Dynarmic::IR::LocationDescriptor& x, const Dynarmic::IR::LocationDescriptor& y) const noexcept {
|
||||||
return x.Value() < y.Value();
|
return x < y;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <>
|
template <>
|
||||||
|
|
Loading…
Reference in a new issue