Merge branch 'feature/direct-page-table-access'

This commit is contained in:
MerryMage 2018-02-12 21:47:43 +00:00
commit a38f35eef6
8 changed files with 318 additions and 12 deletions

View file

@ -111,6 +111,21 @@ struct UserConfig {
/// emitted code.
const std::uint64_t* tpidrro_el0 = nullptr;
/// Pointer to the page table which we can use for direct page table access.
/// If an entry in page_table is null, the relevant memory callback will be called.
/// If page_table is nullptr, all memory accesses hit the memory callbacks.
void** page_table = nullptr;
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of page_table. Valid values are between 12 and 64 inclusive.
/// This is only used if page_table is not nullptr.
size_t page_table_address_space_bits = 36;
/// Determines what happens if the guest accesses an entry that is off the end of the
/// page table. If true, Dynarmic will silently mirror page_table's address space. If
/// false, accessing memory outside of page_table bounds will result in a call to the
/// relevant memory callback.
/// This is only used if page_table is not nullptr.
bool silently_mirror_page_table = true;
// Determines whether AddTicks and GetTicksRemaining are called.
// If false, execution will continue until soon after Jit::HaltExecution is called.
// bool enable_ticks = true; // TODO

View file

@ -4,6 +4,8 @@
* General Public License version 2 or any later version.
*/
#include <initializer_list>
#include <fmt/ostream.h>
#include "backend_x64/a64_emit_x64.h"
@ -53,6 +55,7 @@ A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf)
: EmitX64(code), conf(conf)
{
GenMemory128Accessors();
GenFastmemFallbacks();
code.PreludeComplete();
}
@ -174,6 +177,94 @@ void A64EmitX64::GenMemory128Accessors() {
code.ret();
}
void A64EmitX64::GenFastmemFallbacks() {
const std::initializer_list<int> idxes{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const std::vector<std::tuple<size_t, ArgCallback>> read_callbacks {
{8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8)},
{16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16)},
{32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32)},
{64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64)},
};
const std::vector<std::tuple<size_t, ArgCallback>> write_callbacks {
{8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8)},
{16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16)},
{32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32)},
{64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64)},
};
for (int vaddr_idx : idxes) {
if (vaddr_idx == 4 || vaddr_idx == 15) {
continue;
}
for (int value_idx : idxes) {
code.align();
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
code.call(memory_read_128);
if (value_idx != 0) {
code.movaps(Xbyak::Xmm{value_idx}, xmm0);
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
code.ret();
code.align();
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
if (value_idx != 0) {
code.movaps(xmm0, Xbyak::Xmm{value_idx});
}
code.call(memory_write_128);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
if (vaddr_idx == value_idx || value_idx == 4 || value_idx == 15) {
continue;
}
for (auto& [bitsize, callback] : read_callbacks) {
code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
callback.EmitCall(code);
if (value_idx != code.ABI_RETURN.getIdx()) {
code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN);
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
code.ret();
}
for (auto& [bitsize, callback] : write_callbacks) {
code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
} else {
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
if (value_idx != code.ABI_PARAM3.getIdx()) {
code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx});
}
}
callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
}
}
}
}
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
@ -389,31 +480,170 @@ void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
static Xbyak::RegExp EmitVAddrLookup(const A64::UserConfig& conf, BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, boost::optional<Xbyak::Reg64> arg_scratch = {}) {
constexpr size_t PAGE_BITS = 12;
constexpr size_t PAGE_SIZE = 1 << PAGE_BITS;
const size_t valid_page_index_bits = conf.page_table_address_space_bits - PAGE_BITS;
const size_t unused_top_bits = 64 - conf.page_table_address_space_bits;
Xbyak::Reg64 page_table = arg_scratch.value_or_eval([&]{ return ctx.reg_alloc.ScratchGpr(); });
Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.mov(page_table, reinterpret_cast<u64>(conf.page_table));
code.mov(tmp, vaddr);
if (unused_top_bits == 0) {
code.shr(tmp, int(PAGE_BITS));
} else if (conf.silently_mirror_page_table) {
if (valid_page_index_bits >= 32) {
code.shl(tmp, int(unused_top_bits));
code.shr(tmp, int(unused_top_bits + PAGE_BITS));
} else {
code.shr(tmp, int(PAGE_BITS));
code.and_(tmp, u32((1 << valid_page_index_bits) - 1));
}
} else {
ASSERT(valid_page_index_bits < 32);
code.shr(tmp, int(PAGE_BITS));
code.test(tmp, u32(-(1 << valid_page_index_bits)));
code.jnz(abort, code.T_NEAR);
}
code.mov(page_table, qword[page_table + tmp * sizeof(void*)]);
code.test(page_table, page_table);
code.jz(abort, code.T_NEAR);
code.mov(tmp, vaddr);
code.and_(tmp, static_cast<u32>(PAGE_SIZE - 1));
return page_table + tmp;
}
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
auto src_ptr = EmitVAddrLookup(conf, code, ctx, abort, vaddr, value);
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[src_ptr]);
break;
case 16:
code.movzx(value.cvt32(), word[src_ptr]);
break;
case 32:
code.mov(value.cvt32(), dword[src_ptr]);
break;
case 64:
code.mov(value, qword[src_ptr]);
break;
}
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
}
void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
auto dest_ptr = EmitVAddrLookup(conf, code, ctx, abort, vaddr);
switch (bitsize) {
case 8:
code.mov(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.mov(word[dest_ptr], value.cvt16());
break;
case 32:
code.mov(dword[dest_ptr], value.cvt32());
break;
case 64:
code.mov(qword[dest_ptr], value);
break;
}
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
}
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 8);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 16);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 32);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 64);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
auto src_ptr = EmitVAddrLookup(conf, code, ctx, abort, vaddr);
code.movups(value, xword[src_ptr]);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(read_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.CallFunction(memory_read_128);
@ -421,30 +651,69 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 8);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 16);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 32);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 64);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]);
auto dest_ptr = EmitVAddrLookup(conf, code, ctx, abort, vaddr);
code.movups(xword[dest_ptr], value);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(write_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.Use(args[1], HostLoc::XMM0);

View file

@ -6,6 +6,9 @@
#pragma once
#include <map>
#include <tuple>
#include "backend_x64/a64_jitstate.h"
#include "backend_x64/block_range_information.h"
#include "backend_x64/emit_x64.h"
@ -48,6 +51,13 @@ protected:
void (*memory_write_128)();
void GenMemory128Accessors();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
void GenFastmemFallbacks();
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(...)

View file

@ -40,7 +40,9 @@ public:
: conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
, emitter(block_of_code, conf)
{}
{
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
}
~Impl() = default;

View file

@ -9,23 +9,23 @@
namespace Dynarmic::BackendX64 {
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) {
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.CallFunction(fn);
}
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) {
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.CallFunction(fn);
}
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) {
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
code.mov(code.ABI_PARAM1, arg);
code.CallFunction(fn);
}
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) {
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
#if defined(WIN32) && !defined(__MINGW64__)
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
code.mov(code.ABI_PARAM1, arg);

View file

@ -23,8 +23,8 @@ class Callback {
public:
virtual ~Callback() = default;
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) = 0;
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0;
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
};
class SimpleCallback final : public Callback {
@ -34,8 +34,8 @@ public:
~SimpleCallback() override = default;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) override;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private:
void (*fn)();
@ -48,8 +48,8 @@ public:
~ArgCallback() override = default;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) override;
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
private:
void (*fn)();

View file

@ -39,6 +39,16 @@ inline bool HostLocIsFlag(HostLoc reg) {
return reg >= HostLoc::CF && reg <= HostLoc::OF;
}
inline HostLoc HostLocRegIdx(int idx) {
ASSERT(idx >= 0 && idx <= 15);
return static_cast<HostLoc>(idx);
}
inline HostLoc HostLocXmmIdx(int idx) {
ASSERT(idx >= 0 && idx <= 15);
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
}
inline HostLoc HostLocSpill(size_t i) {
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
}

View file

@ -150,7 +150,7 @@ static void RunTestInstance(const std::array<u64, 31>& regs, const std::array<Ve
auto uni_iter = uni_env.modified_memory.begin();
auto jit_iter = jit_env.modified_memory.begin();
while (uni_iter != uni_env.modified_memory.end() || jit_iter != jit_env.modified_memory.end()) {
if (uni_iter == uni_env.modified_memory.end() || uni_iter->first > jit_iter->first) {
if (uni_iter == uni_env.modified_memory.end() || (jit_iter != jit_env.modified_memory.end() && uni_iter->first > jit_iter->first)) {
fmt::print("{:016x}: {:02x} *\n", jit_iter->first, jit_iter->second);
jit_iter++;
} else if (jit_iter == jit_env.modified_memory.end() || jit_iter->first > uni_iter->first) {