emit_arm64_memory: Initial fastmem implementation
This commit is contained in:
parent
f4727c4ddb
commit
e07dde9ed5
7 changed files with 216 additions and 25 deletions
|
@ -226,6 +226,9 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||||
}
|
}
|
||||||
|
if (conf.fastmem_pointer) {
|
||||||
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.BL(prelude_info.get_ticks_remaining);
|
code.BL(prelude_info.get_ticks_remaining);
|
||||||
|
@ -255,6 +258,9 @@ void A32AddressSpace::EmitPrelude() {
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||||
}
|
}
|
||||||
|
if (conf.fastmem_pointer) {
|
||||||
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.MOV(Xticks, 1);
|
code.MOV(Xticks, 1);
|
||||||
|
@ -358,6 +364,11 @@ EmitConfig A32AddressSpace::GetEmitConfig() {
|
||||||
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
||||||
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
||||||
|
|
||||||
|
.fastmem_pointer = mcl::bit_cast<u64>(conf.fastmem_pointer),
|
||||||
|
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
|
||||||
|
.fastmem_address_space_bits = 32,
|
||||||
|
.silently_mirror_fastmem = true,
|
||||||
|
|
||||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||||
|
|
||||||
|
|
|
@ -406,6 +406,9 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||||
}
|
}
|
||||||
|
if (conf.fastmem_pointer) {
|
||||||
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.BL(prelude_info.get_ticks_remaining);
|
code.BL(prelude_info.get_ticks_remaining);
|
||||||
|
@ -434,6 +437,9 @@ void A64AddressSpace::EmitPrelude() {
|
||||||
if (conf.page_table) {
|
if (conf.page_table) {
|
||||||
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
code.MOV(Xpagetable, mcl::bit_cast<u64>(conf.page_table));
|
||||||
}
|
}
|
||||||
|
if (conf.fastmem_pointer) {
|
||||||
|
code.MOV(Xfastmem, mcl::bit_cast<u64>(conf.fastmem_pointer));
|
||||||
|
}
|
||||||
|
|
||||||
if (conf.enable_cycle_counting) {
|
if (conf.enable_cycle_counting) {
|
||||||
code.MOV(Xticks, 1);
|
code.MOV(Xticks, 1);
|
||||||
|
@ -536,6 +542,11 @@ EmitConfig A64AddressSpace::GetEmitConfig() {
|
||||||
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
|
||||||
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
|
||||||
|
|
||||||
|
.fastmem_pointer = mcl::bit_cast<u64>(conf.fastmem_pointer),
|
||||||
|
.recompile_on_fastmem_failure = conf.recompile_on_fastmem_failure,
|
||||||
|
.fastmem_address_space_bits = conf.fastmem_address_space_bits,
|
||||||
|
.silently_mirror_fastmem = conf.silently_mirror_fastmem,
|
||||||
|
|
||||||
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
.wall_clock_cntpct = conf.wall_clock_cntpct,
|
||||||
.enable_cycle_counting = conf.enable_cycle_counting,
|
.enable_cycle_counting = conf.enable_cycle_counting,
|
||||||
|
|
||||||
|
|
|
@ -300,13 +300,15 @@ FakeCall AddressSpace::FastmemCallback(u64 host_pc) {
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto result = iter->second.fc;
|
||||||
|
|
||||||
if (iter->second.recompile) {
|
if (iter->second.recompile) {
|
||||||
const auto marker = iter->second.marker;
|
const auto marker = iter->second.marker;
|
||||||
fastmem_manager.MarkDoNotFastmem(marker);
|
fastmem_manager.MarkDoNotFastmem(marker);
|
||||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter->second.fc;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
|
|
|
@ -128,6 +128,12 @@ struct EmitConfig {
|
||||||
u8 detect_misaligned_access_via_page_table;
|
u8 detect_misaligned_access_via_page_table;
|
||||||
bool only_detect_misalignment_via_page_table_on_page_boundary;
|
bool only_detect_misalignment_via_page_table_on_page_boundary;
|
||||||
|
|
||||||
|
// Fastmem
|
||||||
|
u64 fastmem_pointer;
|
||||||
|
bool recompile_on_fastmem_failure;
|
||||||
|
size_t fastmem_address_space_bits;
|
||||||
|
bool silently_mirror_fastmem;
|
||||||
|
|
||||||
// Timing
|
// Timing
|
||||||
bool wall_clock_cntpct;
|
bool wall_clock_cntpct;
|
||||||
bool enable_cycle_counting;
|
bool enable_cycle_counting;
|
||||||
|
|
|
@ -5,13 +5,16 @@
|
||||||
|
|
||||||
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
|
#include "dynarmic/backend/arm64/emit_arm64_memory.h"
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#include <mcl/bit_cast.hpp>
|
||||||
#include <oaknut/oaknut.hpp>
|
#include <oaknut/oaknut.hpp>
|
||||||
|
|
||||||
#include "dynarmic/backend/arm64/abi.h"
|
#include "dynarmic/backend/arm64/abi.h"
|
||||||
#include "dynarmic/backend/arm64/emit_arm64.h"
|
#include "dynarmic/backend/arm64/emit_arm64.h"
|
||||||
#include "dynarmic/backend/arm64/emit_context.h"
|
#include "dynarmic/backend/arm64/emit_context.h"
|
||||||
|
#include "dynarmic/backend/arm64/fastmem.h"
|
||||||
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
#include "dynarmic/backend/arm64/fpsr_manager.h"
|
||||||
#include "dynarmic/backend/arm64/reg_alloc.h"
|
#include "dynarmic/backend/arm64/reg_alloc.h"
|
||||||
#include "dynarmic/interface/halt_reason.h"
|
#include "dynarmic/interface/halt_reason.h"
|
||||||
|
@ -249,7 +252,7 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa
|
||||||
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
|
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
|
||||||
// Trashes NZCV
|
// Trashes NZCV
|
||||||
template<size_t bitsize>
|
template<size_t bitsize>
|
||||||
std::pair<oaknut::XReg, oaknut::XReg> EmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||||
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
||||||
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
||||||
|
|
||||||
|
@ -280,23 +283,26 @@ std::pair<oaknut::XReg, oaknut::XReg> EmitVAddrLookup(oaknut::CodeGenerator& cod
|
||||||
}
|
}
|
||||||
|
|
||||||
template<std::size_t bitsize>
|
template<std::size_t bitsize>
|
||||||
const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
|
CodePtr EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
|
||||||
const void* fastmem_location = code.ptr<void*>();
|
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
|
||||||
|
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
|
||||||
|
|
||||||
|
const CodePtr fastmem_location = code.ptr<CodePtr>();
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 8:
|
case 8:
|
||||||
code.LDRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.LDRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
code.LDRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.LDRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
code.LDR(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.LDR(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
code.LDR(oaknut::XReg{value_idx}, Xbase, Xoffset);
|
code.LDR(oaknut::XReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 128:
|
case 128:
|
||||||
code.LDR(oaknut::QReg{value_idx}, Xbase, Xoffset);
|
code.LDR(oaknut::QReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_FALSE("Invalid bitsize");
|
ASSERT_FALSE("Invalid bitsize");
|
||||||
|
@ -311,28 +317,31 @@ const void* EmitMemoryLdr(oaknut::CodeGenerator& code, int value_idx, oaknut::XR
|
||||||
}
|
}
|
||||||
|
|
||||||
template<std::size_t bitsize>
|
template<std::size_t bitsize>
|
||||||
const void* EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered) {
|
CodePtr EmitMemoryStr(oaknut::CodeGenerator& code, int value_idx, oaknut::XReg Xbase, oaknut::XReg Xoffset, bool ordered, bool extend32 = false) {
|
||||||
|
const auto ext = extend32 ? oaknut::IndexExt::UXTW : oaknut::IndexExt::LSL;
|
||||||
|
const auto Roffset = extend32 ? oaknut::RReg{Xoffset.toW()} : oaknut::RReg{Xoffset};
|
||||||
|
|
||||||
if (ordered) {
|
if (ordered) {
|
||||||
// TODO: Use STLR
|
// TODO: Use STLR
|
||||||
code.DMB(oaknut::BarrierOp::ISH);
|
code.DMB(oaknut::BarrierOp::ISH);
|
||||||
}
|
}
|
||||||
|
|
||||||
const void* fastmem_location = code.ptr<void*>();
|
const CodePtr fastmem_location = code.ptr<CodePtr>();
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
case 8:
|
case 8:
|
||||||
code.STRB(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.STRB(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 16:
|
case 16:
|
||||||
code.STRH(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.STRH(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
code.STR(oaknut::WReg{value_idx}, Xbase, Xoffset);
|
code.STR(oaknut::WReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
code.STR(oaknut::XReg{value_idx}, Xbase, Xoffset);
|
code.STR(oaknut::XReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
case 128:
|
case 128:
|
||||||
code.STR(oaknut::QReg{value_idx}, Xbase, Xoffset);
|
code.STR(oaknut::QReg{value_idx}, Xbase, Roffset, ext);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_FALSE("Invalid bitsize");
|
ASSERT_FALSE("Invalid bitsize");
|
||||||
|
@ -364,7 +373,7 @@ void InlinePageTableEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx
|
||||||
|
|
||||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||||
|
|
||||||
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||||
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
||||||
|
|
||||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
||||||
|
@ -404,7 +413,7 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct
|
||||||
|
|
||||||
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||||
|
|
||||||
const auto [Xbase, Xoffset] = EmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
const auto [Xbase, Xoffset] = InlinePageTableEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||||
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered);
|
||||||
|
|
||||||
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
ctx.deferred_emits.emplace_back([&code, &ctx, inst, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end] {
|
||||||
|
@ -430,11 +439,155 @@ void InlinePageTableEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ct
|
||||||
code.l(*end);
|
code.l(*end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<DoNotFastmemMarker> ShouldFastmem(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
if (!ctx.conf.fastmem_pointer || !ctx.fastmem.SupportsFastmem()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto inst_offset = std::distance(ctx.block.begin(), IR::Block::iterator(inst));
|
||||||
|
const auto marker = std::make_tuple(ctx.block.Location(), inst_offset);
|
||||||
|
if (ctx.fastmem.ShouldFastmem(marker)) {
|
||||||
|
return marker;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ShouldExt32(EmitContext& ctx) {
|
||||||
|
return ctx.conf.fastmem_address_space_bits == 32 && ctx.conf.silently_mirror_fastmem;
|
||||||
|
}
|
||||||
|
|
||||||
|
// May use Xscratch0 as scratch register
|
||||||
|
// Address to read/write = [ret0 + ret1], ret0 is always Xfastmem and ret1 is either Xaddr or Xscratch0
|
||||||
|
// Trashes NZCV
|
||||||
|
template<size_t bitsize>
|
||||||
|
std::pair<oaknut::XReg, oaknut::XReg> FastmemEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
|
||||||
|
if (ctx.conf.fastmem_address_space_bits == 64 || ShouldExt32(ctx)) {
|
||||||
|
return std::make_pair(Xfastmem, Xaddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.conf.silently_mirror_fastmem) {
|
||||||
|
code.UBFX(Xscratch0, Xaddr, 0, ctx.conf.fastmem_address_space_bits);
|
||||||
|
return std::make_pair(Xfastmem, Xscratch0);
|
||||||
|
}
|
||||||
|
|
||||||
|
code.LSR(Xscratch0, Xaddr, ctx.conf.fastmem_address_space_bits);
|
||||||
|
code.CBNZ(Xscratch0, *fallback);
|
||||||
|
return std::make_pair(Xfastmem, Xaddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t bitsize>
|
||||||
|
void FastmemEmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||||
|
auto Rvalue = [&] {
|
||||||
|
if constexpr (bitsize == 128) {
|
||||||
|
return ctx.reg_alloc.WriteQ(inst);
|
||||||
|
} else {
|
||||||
|
return ctx.reg_alloc.WriteReg<std::max<std::size_t>(bitsize, 32)>(inst);
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const bool ordered = IsOrdered(args[2].GetImmediateAccType());
|
||||||
|
ctx.fpsr.Spill();
|
||||||
|
ctx.reg_alloc.SpillFlags();
|
||||||
|
RegAlloc::Realize(Xaddr, Rvalue);
|
||||||
|
|
||||||
|
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||||
|
|
||||||
|
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||||
|
const auto fastmem_location = EmitMemoryLdr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
|
||||||
|
|
||||||
|
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
|
||||||
|
ctx.ebi.fastmem_patch_info.emplace(
|
||||||
|
fastmem_location - ctx.ebi.entry_point,
|
||||||
|
FastmemPatchInfo{
|
||||||
|
.marker = marker,
|
||||||
|
.fc = FakeCall{
|
||||||
|
.call_pc = mcl::bit_cast<u64>(code.ptr<void*>()),
|
||||||
|
.ret_pc = 0,
|
||||||
|
},
|
||||||
|
.recompile = ctx.conf.recompile_on_fastmem_failure,
|
||||||
|
});
|
||||||
|
|
||||||
|
code.l(*fallback);
|
||||||
|
code.MOV(Xscratch0, Xaddr);
|
||||||
|
EmitRelocation(code, ctx, WrappedReadMemoryLinkTarget(bitsize));
|
||||||
|
if (ordered) {
|
||||||
|
code.DMB(oaknut::BarrierOp::ISH);
|
||||||
|
}
|
||||||
|
if constexpr (bitsize == 128) {
|
||||||
|
code.MOV(Rvalue.B16(), Q0.B16());
|
||||||
|
} else {
|
||||||
|
code.MOV(Rvalue.toX(), Xscratch0);
|
||||||
|
}
|
||||||
|
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||||
|
code.B(*end);
|
||||||
|
});
|
||||||
|
|
||||||
|
code.l(*end);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t bitsize>
|
||||||
|
void FastmemEmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst, DoNotFastmemMarker marker) {
|
||||||
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
auto Xaddr = ctx.reg_alloc.ReadX(args[1]);
|
||||||
|
auto Rvalue = [&] {
|
||||||
|
if constexpr (bitsize == 128) {
|
||||||
|
return ctx.reg_alloc.ReadQ(args[2]);
|
||||||
|
} else {
|
||||||
|
return ctx.reg_alloc.ReadReg<std::max<std::size_t>(bitsize, 32)>(args[2]);
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
|
||||||
|
ctx.fpsr.Spill();
|
||||||
|
ctx.reg_alloc.SpillFlags();
|
||||||
|
RegAlloc::Realize(Xaddr, Rvalue);
|
||||||
|
|
||||||
|
SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel();
|
||||||
|
|
||||||
|
const auto [Xbase, Xoffset] = FastmemEmitVAddrLookup<bitsize>(code, ctx, Xaddr, fallback);
|
||||||
|
const auto fastmem_location = EmitMemoryStr<bitsize>(code, Rvalue->index(), Xbase, Xoffset, ordered, ShouldExt32(ctx));
|
||||||
|
|
||||||
|
ctx.deferred_emits.emplace_back([&code, &ctx, inst, marker, Xaddr = *Xaddr, Rvalue = *Rvalue, ordered, fallback, end, fastmem_location] {
|
||||||
|
ctx.ebi.fastmem_patch_info.emplace(
|
||||||
|
fastmem_location - ctx.ebi.entry_point,
|
||||||
|
FastmemPatchInfo{
|
||||||
|
.marker = marker,
|
||||||
|
.fc = FakeCall{
|
||||||
|
.call_pc = mcl::bit_cast<u64>(code.ptr<void*>()),
|
||||||
|
.ret_pc = 0,
|
||||||
|
},
|
||||||
|
.recompile = ctx.conf.recompile_on_fastmem_failure,
|
||||||
|
});
|
||||||
|
|
||||||
|
code.l(*fallback);
|
||||||
|
if constexpr (bitsize == 128) {
|
||||||
|
code.MOV(Xscratch0, Xaddr);
|
||||||
|
code.MOV(Q0.B16(), Rvalue.B16());
|
||||||
|
} else {
|
||||||
|
code.MOV(Xscratch0, Xaddr);
|
||||||
|
code.MOV(Xscratch1, Rvalue.toX());
|
||||||
|
}
|
||||||
|
if (ordered) {
|
||||||
|
code.DMB(oaknut::BarrierOp::ISH);
|
||||||
|
}
|
||||||
|
EmitRelocation(code, ctx, WrappedWriteMemoryLinkTarget(bitsize));
|
||||||
|
if (ordered) {
|
||||||
|
code.DMB(oaknut::BarrierOp::ISH);
|
||||||
|
}
|
||||||
|
ctx.conf.emit_check_memory_abort(code, ctx, inst, *end);
|
||||||
|
code.B(*end);
|
||||||
|
});
|
||||||
|
|
||||||
|
code.l(*end);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
template<size_t bitsize>
|
template<size_t bitsize>
|
||||||
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.conf.page_table_pointer != 0) {
|
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||||
|
FastmemEmitReadMemory<bitsize>(code, ctx, inst, *marker);
|
||||||
|
} else if (ctx.conf.page_table_pointer != 0) {
|
||||||
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
|
InlinePageTableEmitReadMemory<bitsize>(code, ctx, inst);
|
||||||
} else {
|
} else {
|
||||||
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
|
CallbackOnlyEmitReadMemory<bitsize>(code, ctx, inst);
|
||||||
|
@ -448,7 +601,9 @@ void EmitExclusiveReadMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::
|
||||||
|
|
||||||
template<size_t bitsize>
|
template<size_t bitsize>
|
||||||
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
void EmitWriteMemory(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
|
||||||
if (ctx.conf.page_table_pointer != 0) {
|
if (const auto marker = ShouldFastmem(ctx, inst)) {
|
||||||
|
FastmemEmitWriteMemory<bitsize>(code, ctx, inst, *marker);
|
||||||
|
} else if (ctx.conf.page_table_pointer != 0) {
|
||||||
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
|
InlinePageTableEmitWriteMemory<bitsize>(code, ctx, inst);
|
||||||
} else {
|
} else {
|
||||||
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);
|
CallbackOnlyEmitWriteMemory<bitsize>(code, ctx, inst);
|
||||||
|
|
|
@ -36,8 +36,12 @@ public:
|
||||||
explicit FastmemManager(ExceptionHandler& eh)
|
explicit FastmemManager(ExceptionHandler& eh)
|
||||||
: exception_handler(eh) {}
|
: exception_handler(eh) {}
|
||||||
|
|
||||||
|
bool SupportsFastmem() const {
|
||||||
|
return exception_handler.SupportsFastmem();
|
||||||
|
}
|
||||||
|
|
||||||
bool ShouldFastmem(DoNotFastmemMarker marker) const {
|
bool ShouldFastmem(DoNotFastmemMarker marker) const {
|
||||||
return exception_handler.SupportsFastmem() && do_not_fastmem.count(marker) == 0;
|
return do_not_fastmem.count(marker) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void MarkDoNotFastmem(DoNotFastmemMarker marker) {
|
void MarkDoNotFastmem(DoNotFastmemMarker marker) {
|
||||||
|
|
|
@ -37,16 +37,18 @@ FakeCall AxxEmitX64::FastmemCallback(u64 rip_) {
|
||||||
ASSERT_FALSE("iter != fastmem_patch_info.end()");
|
ASSERT_FALSE("iter != fastmem_patch_info.end()");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FakeCall result{
|
||||||
|
.call_rip = iter->second.callback,
|
||||||
|
.ret_rip = iter->second.resume_rip,
|
||||||
|
};
|
||||||
|
|
||||||
if (iter->second.recompile) {
|
if (iter->second.recompile) {
|
||||||
const auto marker = iter->second.marker;
|
const auto marker = iter->second.marker;
|
||||||
do_not_fastmem.emplace(marker);
|
do_not_fastmem.emplace(marker);
|
||||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||||
}
|
}
|
||||||
|
|
||||||
return FakeCall{
|
return result;
|
||||||
.call_rip = iter->second.callback,
|
|
||||||
.ret_rip = iter->second.resume_rip,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<std::size_t bitsize, auto callback>
|
template<std::size_t bitsize, auto callback>
|
||||||
|
|
Loading…
Reference in a new issue