From f45a5e17c6a7fcf4a8b1e551fe8113b21d16b0c9 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 12 Feb 2018 19:49:56 +0000 Subject: [PATCH] Implement direct page table access --- include/dynarmic/A64/config.h | 3 + src/backend_x64/a64_emit_x64.cpp | 258 +++++++++++++++++++++++++++++++ src/backend_x64/a64_emit_x64.h | 10 ++ src/backend_x64/hostloc.h | 10 ++ 4 files changed, 281 insertions(+) diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 312f71a6..e8ef1b15 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -111,6 +111,9 @@ struct UserConfig { /// emitted code. const std::uint64_t* tpidrro_el0 = nullptr; + /// Pointer to the page table which we can use for direct page table access. + void** page_table = nullptr; + // Determines whether AddTicks and GetTicksRemaining are called. // If false, execution will continue until soon after Jit::HaltExecution is called. // bool enable_ticks = true; // TODO diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 412943b3..ea0f9d20 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -4,6 +4,8 @@ * General Public License version 2 or any later version. */ +#include + #include #include "backend_x64/a64_emit_x64.h" @@ -53,6 +55,7 @@ A64EmitX64::A64EmitX64(BlockOfCode& code, A64::UserConfig conf) : EmitX64(code), conf(conf) { GenMemory128Accessors(); + GenFastmemFallbacks(); code.PreludeComplete(); } @@ -174,6 +177,94 @@ void A64EmitX64::GenMemory128Accessors() { code.ret(); } +void A64EmitX64::GenFastmemFallbacks() { + const std::initializer_list idxes{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + const std::vector> read_callbacks { + {8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8)}, + {16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16)}, + {32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32)}, + {64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64)}, + }; + const std::vector> write_callbacks { + {8, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8)}, + {16, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16)}, + {32, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32)}, + {64, DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64)}, + }; + + for (int vaddr_idx : idxes) { + if (vaddr_idx == 4 || vaddr_idx == 15) { + continue; + } + + for (int value_idx : idxes) { + code.align(); + read_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + code.call(memory_read_128); + if (value_idx != 0) { + code.movaps(Xbyak::Xmm{value_idx}, xmm0); + } + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(value_idx)); + code.ret(); + + code.align(); + write_fallbacks[std::make_tuple(128, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(code); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + if (value_idx != 0) { + code.movaps(xmm0, Xbyak::Xmm{value_idx}); + } + code.call(memory_write_128); + ABI_PopCallerSaveRegistersAndAdjustStack(code); + code.ret(); + + if (vaddr_idx == value_idx || value_idx == 4 || value_idx == 15) { + continue; + } + + for (auto& [bitsize, callback] : read_callbacks) { + code.align(); + read_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + callback.EmitCall(code); + if (value_idx != code.ABI_RETURN.getIdx()) { + code.mov(Xbyak::Reg64{value_idx}, code.ABI_RETURN); + } + ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocRegIdx(value_idx)); + code.ret(); + } + + for (auto& [bitsize, callback] : write_callbacks) { + code.align(); + write_fallbacks[std::make_tuple(bitsize, vaddr_idx, value_idx)] = code.getCurr(); + ABI_PushCallerSaveRegistersAndAdjustStack(code); + if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { + code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); + } else { + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + } + callback.EmitCall(code); + ABI_PopCallerSaveRegistersAndAdjustStack(code); + code.ret(); + } + } + } +} + void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); @@ -389,31 +480,157 @@ void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +static std::tuple EmitVAddrLookup(const A64::UserConfig& conf, BlockOfCode& code, A64EmitContext& ctx, Xbyak::Reg64 vaddr, boost::optional arg_scratch = {}) { + constexpr int PAGE_BITS = 12; + constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; + + Xbyak::Reg64 page_table = arg_scratch.value_or_eval([&]{ return ctx.reg_alloc.ScratchGpr(); }); + Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + code.mov(page_table, reinterpret_cast(conf.page_table)); + code.mov(tmp, vaddr); + code.shr(tmp, PAGE_BITS); + code.mov(page_table, qword[page_table + tmp * sizeof(void*)]); + code.mov(tmp, vaddr); + code.and_(tmp, PAGE_SIZE - 1); + return {page_table, page_table + tmp}; +} + +void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { + Xbyak::Label abort, end; + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); + + auto [page, src_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr, value); + code.test(page, page); + code.jz(abort, code.T_NEAR); + switch (bitsize) { + case 8: + code.movzx(value.cvt32(), code.byte[src_ptr]); + break; + case 16: + code.movzx(value.cvt32(), word[src_ptr]); + break; + case 32: + code.mov(value.cvt32(), dword[src_ptr]); + break; + case 64: + code.mov(value, qword[src_ptr]); + break; + } + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + + ctx.reg_alloc.DefineValue(inst, value); +} + +void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { + Xbyak::Label abort, end; + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + + auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr); + code.test(page, page); + code.jz(abort, code.T_NEAR); + switch (bitsize) { + case 8: + code.mov(code.byte[dest_ptr], value.cvt8()); + break; + case 16: + code.mov(word[dest_ptr], value.cvt16()); + break; + case 32: + code.mov(dword[dest_ptr], value.cvt32()); + break; + case 64: + code.mov(qword[dest_ptr], value); + break; + } + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); +} + void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryRead(ctx, inst, 8); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead8).EmitCall(code); } void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryRead(ctx, inst, 16); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead16).EmitCall(code); } void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryRead(ctx, inst, 32); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead32).EmitCall(code); } void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryRead(ctx, inst, 64); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(inst, {}, args[0]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryRead64).EmitCall(code); } void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + Xbyak::Label abort, end; + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); + + auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr); + code.test(page, page); + code.jz(abort, code.T_NEAR); + code.movups(value, xword[dest_ptr]); + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(read_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + + ctx.reg_alloc.DefineValue(inst, value); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0]); code.CallFunction(memory_read_128); @@ -421,30 +638,71 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryWrite(ctx, inst, 8); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite8).EmitCall(code); } void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryWrite(ctx, inst, 16); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite16).EmitCall(code); } void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryWrite(ctx, inst, 32); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite32).EmitCall(code); } void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + EmitDirectPageTableMemoryWrite(ctx, inst, 64); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); DEVIRT(conf.callbacks, &A64::UserCallbacks::MemoryWrite64).EmitCall(code); } void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.page_table) { + Xbyak::Label abort, end; + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]); + + auto [page, dest_ptr] = EmitVAddrLookup(conf, code, ctx, vaddr); + code.test(page, page); + code.jz(abort, code.T_NEAR); + code.movups(xword[dest_ptr], value); + code.L(end); + + code.SwitchToFarCode(); + code.L(abort); + code.call(write_fallbacks[std::make_tuple(128, vaddr.getIdx(), value.getIdx())]); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ctx.reg_alloc.Use(args[0], ABI_PARAM2); ctx.reg_alloc.Use(args[1], HostLoc::XMM0); diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 1ac71438..415958e5 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -6,6 +6,9 @@ #pragma once +#include +#include + #include "backend_x64/a64_jitstate.h" #include "backend_x64/block_range_information.h" #include "backend_x64/emit_x64.h" @@ -48,6 +51,13 @@ protected: void (*memory_write_128)(); void GenMemory128Accessors(); + std::map, void(*)()> read_fallbacks; + std::map, void(*)()> write_fallbacks; + void GenFastmemFallbacks(); + + void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); + void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); + // Microinstruction emitters #define OPCODE(...) #define A32OPC(...) diff --git a/src/backend_x64/hostloc.h b/src/backend_x64/hostloc.h index bb6d22fd..59f48a23 100644 --- a/src/backend_x64/hostloc.h +++ b/src/backend_x64/hostloc.h @@ -39,6 +39,16 @@ inline bool HostLocIsFlag(HostLoc reg) { return reg >= HostLoc::CF && reg <= HostLoc::OF; } +inline HostLoc HostLocRegIdx(int idx) { + ASSERT(idx >= 0 && idx <= 15); + return static_cast(idx); +} + +inline HostLoc HostLocXmmIdx(int idx) { + ASSERT(idx >= 0 && idx <= 15); + return static_cast(static_cast(HostLoc::XMM0) + idx); +} + inline HostLoc HostLocSpill(size_t i) { return static_cast(static_cast(HostLoc::FirstSpill) + i); }