A64: Match A32 page_table code

Here we increase the similarity between the A64 and A32 front-ends in terms of their
page_table handling code. In this commit, we:

* Reserve and use r14 as a register to store the page_table pointer.
* Align the code to be more similar in structure.
* Add a conf member to A32EmitContext.
* Remove scratch argument from EmitVAddrLookup.
This commit is contained in:
MerryMage 2020-06-18 11:27:12 +01:00
parent 08350d06f1
commit 13367a7efd
5 changed files with 128 additions and 85 deletions

View file

@ -60,15 +60,15 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
ASSERT_FALSE("Should never happen.");
}
A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block)
: EmitContext(reg_alloc, block) {}
A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block)
: EmitContext(reg_alloc, block), conf(conf) {}
A32::LocationDescriptor A32EmitContext::Location() const {
return A32::LocationDescriptor{block.Location()};
}
bool A32EmitContext::IsSingleStep() const {
return A32::LocationDescriptor{block.Location()}.SingleStepping();
return Location().SingleStepping();
}
FP::FPCR A32EmitContext::FPCR() const {
@ -105,7 +105,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
}();
RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg<A32JitState>, gpr_order, any_xmm};
A32EmitContext ctx{reg_alloc, block};
A32EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
code.align();
@ -876,11 +876,14 @@ FakeCall A32EmitX64::FastmemCallback(u64 rip_) {
return ret;
}
static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& conf, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) {
namespace {
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
if (bitsize == 8 || (conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
void EmitDetectMisaignedVAddr(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) {
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
return;
}
@ -898,7 +901,7 @@ static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& c
code.test(vaddr, align_mask);
if (!conf.only_detect_misalignment_via_page_table_on_page_boundary) {
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
code.jnz(abort, code.T_NEAR);
return;
}
@ -920,12 +923,11 @@ static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& c
code.SwitchToNearCode();
}
static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, const A32::UserConfig& conf, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, std::optional<Xbyak::Reg64> arg_scratch = {}) {
constexpr size_t page_bits = A32::UserConfig::PAGE_BITS;
const Xbyak::Reg64 page = arg_scratch ? *arg_scratch : reg_alloc.ScratchGpr();
const Xbyak::Reg32 tmp = conf.absolute_offset_page_table ? page.cvt32() : reg_alloc.ScratchGpr().cvt32();
Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32();
EmitDetectMisaignedVAddr(code, conf, bitsize, abort, vaddr.cvt32(), tmp);
EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr.cvt32(), tmp);
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
@ -933,18 +935,17 @@ static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, con
code.shr(tmp, static_cast<int>(page_bits));
code.mov(page, qword[r14 + tmp * sizeof(void*)]);
code.test(page, page);
code.jz(abort);
if (conf.absolute_offset_page_table) {
code.jz(abort, code.T_NEAR);
if (ctx.conf.absolute_offset_page_table) {
return page + vaddr;
}
constexpr size_t page_mask = (1 << page_bits) - 1;
code.mov(tmp, vaddr.cvt32());
code.and_(tmp, static_cast<u32>(page_mask));
return page + tmp;
}
template<std::size_t bitsize>
static void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) {
void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) {
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[addr]);
@ -964,7 +965,7 @@ static void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, cons
}
template<std::size_t bitsize>
static void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) {
void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) {
switch (bitsize) {
case 8:
code.mov(code.byte[addr], value.cvt8());
@ -983,6 +984,8 @@ static void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, con
}
}
} // anonymous namespace
template<std::size_t bitsize, auto callback>
void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -1017,14 +1020,14 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label abort, end;
const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, conf, bitsize, abort, vaddr, value);
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
EmitReadMemoryMov<bitsize>(code, value, src_ptr);
code.jmp(end);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
code.L(end);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
@ -1063,14 +1066,14 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
Xbyak::Label abort, end;
const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, conf, bitsize, abort, vaddr);
const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
EmitWriteMemoryMov<bitsize>(code, dest_ptr, value);
code.jmp(end);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(wrapped_fn);
code.L(end);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
}

View file

@ -26,10 +26,13 @@ namespace Dynarmic::Backend::X64 {
class RegAlloc;
struct A32EmitContext final : public EmitContext {
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
A32::LocationDescriptor Location() const;
bool IsSingleStep() const;
FP::FPCR FPCR() const override;
const A32::UserConfig& conf;
};
class A32EmitX64 final : public EmitX64 {

View file

@ -71,7 +71,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
code.EnableWriting();
SCOPE_EXIT { code.DisableWriting(); };
RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg<A64JitState>, any_gpr, any_xmm};
static const std::vector<HostLoc> gpr_order = [this]{
std::vector<HostLoc> gprs{any_gpr};
if (conf.page_table) {
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
}
return gprs;
}();
RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg<A64JitState>, gpr_order, any_xmm};
A64EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
@ -728,6 +736,7 @@ namespace {
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) {
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
@ -772,16 +781,15 @@ void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bit
code.SwitchToNearCode();
}
Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, std::optional<Xbyak::Reg64> arg_scratch = {}) {
Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
const Xbyak::Reg64 page_table = arg_scratch ? *arg_scratch : ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr();
EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr, tmp);
code.mov(page_table, reinterpret_cast<u64>(ctx.conf.page_table));
code.mov(tmp, vaddr);
if (unused_top_bits == 0) {
code.shr(tmp, int(page_bits));
@ -799,86 +807,108 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bit
code.test(tmp, u32(-(1 << valid_page_index_bits)));
code.jnz(abort, code.T_NEAR);
}
code.mov(page_table, qword[page_table + tmp * sizeof(void*)]);
code.test(page_table, page_table);
code.mov(page, qword[r14 + tmp * sizeof(void*)]);
code.test(page, page);
code.jz(abort, code.T_NEAR);
if (ctx.conf.absolute_offset_page_table) {
return page_table + vaddr;
return page + vaddr;
}
code.mov(tmp, vaddr);
code.and_(tmp, static_cast<u32>(page_size - 1));
return page_table + tmp;
code.and_(tmp, static_cast<u32>(page_mask));
return page + tmp;
}
template<std::size_t bitsize>
void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) {
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[addr]);
return;
case 16:
code.movzx(value.cvt32(), word[addr]);
return;
case 32:
code.mov(value.cvt32(), dword[addr]);
return;
case 64:
code.mov(value, qword[addr]);
return;
default:
ASSERT_FALSE("Invalid bitsize");
}
}
template<std::size_t bitsize>
void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) {
switch (bitsize) {
case 8:
code.mov(code.byte[addr], value.cvt8());
return;
case 16:
code.mov(word[addr], value.cvt16());
return;
case 32:
code.mov(dword[addr], value.cvt32());
return;
case 64:
code.mov(qword[addr], value);
return;
default:
ASSERT_FALSE("Invalid bitsize");
}
}
} // anonymous namepsace
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
template<std::size_t bitsize>
void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr();
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr, value);
switch (bitsize) {
case 8:
code.movzx(value.cvt32(), code.byte[src_ptr]);
break;
case 16:
code.movzx(value.cvt32(), word[src_ptr]);
break;
case 32:
code.mov(value.cvt32(), dword[src_ptr]);
break;
case 64:
code.mov(value, qword[src_ptr]);
break;
}
const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
Xbyak::Label abort, end;
const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
EmitReadMemoryMov<bitsize>(code, value, src_ptr);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.call(wrapped_fn);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
ctx.reg_alloc.DefineValue(inst, value);
}
void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) {
Xbyak::Label abort, end;
template<std::size_t bitsize>
void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())];
Xbyak::Label abort, end;
const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr);
switch (bitsize) {
case 8:
code.mov(code.byte[dest_ptr], value.cvt8());
break;
case 16:
code.mov(word[dest_ptr], value.cvt16());
break;
case 32:
code.mov(dword[dest_ptr], value.cvt32());
break;
case 64:
code.mov(qword[dest_ptr], value);
break;
}
EmitWriteMemoryMov<bitsize>(code, dest_ptr, value);
code.L(end);
code.SwitchToFarCode();
code.L(abort);
code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]);
code.call(wrapped_fn);
code.jmp(end, code.T_NEAR);
code.SwitchToNearCode();
}
void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 8);
EmitDirectPageTableMemoryRead<8>(ctx, inst);
return;
}
@ -889,7 +919,7 @@ void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 16);
EmitDirectPageTableMemoryRead<16>(ctx, inst);
return;
}
@ -900,7 +930,7 @@ void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 32);
EmitDirectPageTableMemoryRead<32>(ctx, inst);
return;
}
@ -911,7 +941,7 @@ void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryRead(ctx, inst, 64);
EmitDirectPageTableMemoryRead<64>(ctx, inst);
return;
}
@ -950,7 +980,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 8);
EmitDirectPageTableMemoryWrite<8>(ctx, inst);
return;
}
@ -961,7 +991,7 @@ void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 16);
EmitDirectPageTableMemoryWrite<16>(ctx, inst);
return;
}
@ -972,7 +1002,7 @@ void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 32);
EmitDirectPageTableMemoryWrite<32>(ctx, inst);
return;
}
@ -983,7 +1013,7 @@ void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.page_table) {
EmitDirectPageTableMemoryWrite(ctx, inst, 64);
EmitDirectPageTableMemoryWrite<64>(ctx, inst);
return;
}

View file

@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <map>
#include <tuple>
@ -79,8 +80,10 @@ protected:
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
void GenTerminalHandlers();
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
template<std::size_t bitsize>
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize>
void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>

View file

@ -33,8 +33,12 @@ static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*Lo
};
}
static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig&) {
return [](BlockOfCode&){};
static std::function<void(BlockOfCode&)> GenRCP(const A64::UserConfig& conf) {
return [conf](BlockOfCode& code) {
if (conf.page_table) {
code.mov(code.r14, Common::BitCast<u64>(conf.page_table));
}
};
}
struct Jit::Impl final {