Implement direct page table access

This commit is contained in:
MerryMage 2018-02-12 19:49:56 +00:00
parent ef02658049
commit f45a5e17c6
4 changed files with 281 additions and 0 deletions

View file

@ -111,6 +111,9 @@ struct UserConfig {
/// emitted code.
const std::uint64_t* tpidrro_el0 = nullptr;
/// Pointer to the page table which we can use for direct page table access.
void** page_table = nullptr;
// Determines whether AddTicks and GetTicksRemaining are called.
// If false, execution will continue until soon after Jit::HaltExecution is called.
// bool enable_ticks = true; // TODO

View file

@ -4,6 +4,8 @@
* General Public License version 2 or any later version.
*/
#include <initializer_list>
#include <fmt/ostream.h>
#include "backend_x64/a64_emit_x64.h"
@ -53,6 +55,7 @@ A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf)
: EmitX64(code), conf(conf)
{
GenMemory128Accessors();
GenFastmemFallbacks();
code.PreludeComplete();
}
@ -174,6 +177,94 @@ void A64EmitX64::GenMemory128Accessors() {
code.ret();
}
void A64EmitX64::GenFastmemFallbacks() {
const std::initializer_list<int> idxes{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const std::vector<std::tuple<size_t, ArgCallback>> read_callbacks {
{8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8)},
{16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16)},
{32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32)},
{64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64)},
};
const std::vector<std::tuple<size_t, ArgCallback>> write_callbacks {
{8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8)},
{16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16)},
{32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32)},
{64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64)},
};
for (int vaddr_idx : idxes) {
if (vaddr_idx == 4 || vaddr_idx == 15) {
continue;
}
for (int value_idx : idxes) {
code.align();
read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
code.call(memory_read_128);
if (value_idx != 0) {
code.movaps(Xbyak::Xmm{value_idx}, xmm0);
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx));
code.ret();
code.align();
write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
if (value_idx != 0) {
code.movaps(xmm0, Xbyak::Xmm{value_idx});
}
code.call(memory_write_128);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
if (vaddr_idx == value_idx || value_idx == 4 || value_idx == 15) {
continue;
}
for (auto& [bitsize, callback] : read_callbacks) {
code.align();
read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
callback.EmitCall(code);
if (value_idx != code.ABI_RETURN.getIdx()) {
code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN);
}
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx));
code.ret();
}
for (auto& [bitsize, callback] : write_callbacks) {
code.align();
write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr<void(*)()>();
ABI_PushCallerSaveRegistersAndAdjustStack(code);
if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) {
code.xchg(code.ABI_PARAM2, code.ABI_PARAM3);
} else {
if (vaddr_idx != code.ABI_PARAM2.getIdx()) {
code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx});
}
if (value_idx != code.ABI_PARAM3.getIdx()) {
code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx});
}
}
callback.EmitCall(code);
ABI_PopCallerSaveRegistersAndAdjustStack(code);
code.ret();
}
}
}
}
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
@ -389,31 +480,157 @@ void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.DefineValue(inst, result);
}
static std::tuple<Xbyak::Reg, Xbyak::RegExp> EmitVAddrLookup(const A64::UserConfig& conf, BlockOfCode& code, A64EmitContext& ctx, Xbyak::Reg64 vaddr, boost::optional<Xbyak::Reg64> arg_scratch = {}) {
constexpr int PAGE_BITS = 12;
constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
Xbyak::Reg64 page_table = arg_scratch.value_or_eval([&]{ return ctx.reg_alloc.ScratchGpr(); });
Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.mov(page_table, reinterpret_cast<u64>(conf.page_table));
code.mov(tmp, vaddr);
code.shr(tmp, PAGE_BITS);
code.mov(page_table, qword[page_table + tmp * sizeof(void*)]);
code.mov(tmp, vaddr);
code.and_(tmp, PAGE_SIZE - 1);
return {page_table, page_table + tmp};
}
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
auto [page, src_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr, value);
code.test(page, page);
code.jz(abort, code.T_NEAR);
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[src_ptr]);
break;
case 16:
code.movzx(value.cvt32(), word[src_ptr]);
break;
case 32:
code.mov(value.cvt32(), dword[src_ptr]);
break;
case 64:
code.mov(value, qword[src_ptr]);
break;
}
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
}
void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr);
code.test(page, page);
code.jz(abort, code.T_NEAR);
switch (bitsize) {
case 8:
code.mov(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.mov(word[dest_ptr], value.cvt16());
break;
case 32:
code.mov(dword[dest_ptr], value.cvt32());
break;
case 64:
code.mov(qword[dest_ptr], value);
break;
}
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
}
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 8);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 16);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 32);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 64);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code);
}
void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr);
code.test(page, page);
code.jz(abort, code.T_NEAR);
code.movups(value, xword[dest_ptr]);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(read_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
code.CallFunction(memory_read_128);
@ -421,30 +638,71 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 8);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 16);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 32);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 64);
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code);
}
void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
Xbyak::Label abort, end;
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]);
auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr);
code.test(page, page);
code.jz(abort, code.T_NEAR);
code.movups(xword[dest_ptr], value);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(write_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
return;
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.Use(args[0], ABI_PARAM2);
ctx.reg_alloc.Use(args[1], HostLoc::XMM0);

View file

@ -6,6 +6,9 @@
#pragma once
#include <map>
#include <tuple>
#include "backend_x64/a64_jitstate.h"
#include "backend_x64/block_range_information.h"
#include "backend_x64/emit_x64.h"
@ -48,6 +51,13 @@ protected:
void (*memory_write_128)();
void GenMemory128Accessors();
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
void GenFastmemFallbacks();
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
// Microinstruction emitters
#define OPCODE(...)
#define A32OPC(...)

View file

@ -39,6 +39,16 @@ inline bool HostLocIsFlag(HostLoc reg) {
return reg >= HostLoc::CF && reg <= HostLoc::OF;
}
inline HostLoc HostLocRegIdx(int idx) {
ASSERT(idx >= 0 && idx <= 15);
return static_cast<HostLoc>(idx);
}
inline HostLoc HostLocXmmIdx(int idx) {
ASSERT(idx >= 0 && idx <= 15);
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
}
inline HostLoc HostLocSpill(size_t i) {
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
}