From 675efecf4708e70558630aa34d099b71e7a7cd22 Mon Sep 17 00:00:00 2001 From: merry Date: Tue, 29 Mar 2022 20:11:29 +0100 Subject: [PATCH] emit_x64_memory: Combine A32 and A64 memory code --- src/dynarmic/CMakeLists.txt | 1 + src/dynarmic/backend/x64/a32_emit_x64.h | 13 +- .../backend/x64/a32_emit_x64_memory.cpp | 372 +------------- .../backend/x64/a64_emit_x64_memory.cpp | 453 +---------------- .../backend/x64/emit_x64_memory.cpp.inc | 471 ++++++++++++++++++ 5 files changed, 502 insertions(+), 808 deletions(-) create mode 100644 src/dynarmic/backend/x64/emit_x64_memory.cpp.inc diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index c6423337..38b5b43e 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -285,6 +285,7 @@ if (ARCHITECTURE STREQUAL "x86_64") backend/x64/emit_x64_data_processing.cpp backend/x64/emit_x64_floating_point.cpp backend/x64/emit_x64_memory.h + backend/x64/emit_x64_memory.cpp.inc backend/x64/emit_x64_packed.cpp backend/x64/emit_x64_saturation.cpp backend/x64/emit_x64_sm4.cpp diff --git a/src/dynarmic/backend/x64/a32_emit_x64.h b/src/dynarmic/backend/x64/a32_emit_x64.h index 43f90f6a..e87454b4 100644 --- a/src/dynarmic/backend/x64/a32_emit_x64.h +++ b/src/dynarmic/backend/x64/a32_emit_x64.h @@ -71,6 +71,9 @@ protected: std::array fast_dispatch_table; void ClearFastDispatchTable(); + void (*memory_read_128)() = nullptr; // Dummy + void (*memory_write_128)() = nullptr; // Dummy + std::map, void (*)()> read_fallbacks; std::map, void (*)()> write_fallbacks; std::map, void (*)()> exclusive_write_fallbacks; @@ -99,7 +102,7 @@ protected: u64 resume_rip; u64 callback; DoNotFastmemMarker marker; - bool compile; + bool recompile; }; tsl::robin_map fastmem_patch_info; std::set do_not_fastmem; @@ -112,13 +115,13 @@ protected: template void EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst); template - void ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst); + void EmitExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst); template - void ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst); + void EmitExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst); template - void ExclusiveReadMemoryInline(A32EmitContext& ctx, IR::Inst* inst); + void EmitExclusiveReadMemoryInline(A32EmitContext& ctx, IR::Inst* inst); template - void ExclusiveWriteMemoryInline(A32EmitContext& ctx, IR::Inst* inst); + void EmitExclusiveWriteMemoryInline(A32EmitContext& ctx, IR::Inst* inst); // Terminal instruction emitters void EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_location, IR::LocationDescriptor old_location); diff --git a/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index 43d7c0c7..6608bc2c 100644 --- a/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -123,146 +123,9 @@ void A32EmitX64::GenFastmemFallbacks() { } } -std::optional A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const { - if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { - return std::nullopt; - } - - const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); - if (do_not_fastmem.count(marker) > 0) { - return std::nullopt; - } - return marker; -} - -FakeCall A32EmitX64::FastmemCallback(u64 rip_) { - const auto iter = fastmem_patch_info.find(rip_); - - if (iter == fastmem_patch_info.end()) { - fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); - fmt::print("Segfault wasn't at a fastmem patch location!\n"); - fmt::print("Now dumping code.......\n\n"); - Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); - ASSERT_FALSE("iter != fastmem_patch_info.end()"); - } - - if (iter->second.compile) { - const auto marker = iter->second.marker; - do_not_fastmem.emplace(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - - return FakeCall{ - .call_rip = iter->second.callback, - .ret_rip = iter->second.resume_rip, - }; -} - -template -void A32EmitX64::EmitMemoryRead(A32EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(inst, {}, args[0]); - Devirtualize(conf.callbacks).EmitCall(code); - code.ZeroExtendFrom(bitsize, code.ABI_RETURN); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - if (fastmem_marker) { - // Use fastmem - const auto src_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value.getIdx(), src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); - - ctx.reg_alloc.DefineValue(inst, value); - return; - } - - // Use page table - ASSERT(conf.page_table); - Xbyak::Label abort, end; - - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - EmitReadMemoryMov(code, value.getIdx(), src_ptr); - code.L(end); - - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - - ctx.reg_alloc.DefineValue(inst, value); -} - -template -void A32EmitX64::EmitMemoryWrite(A32EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - Devirtualize(conf.callbacks).EmitCall(code); - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - - const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - if (fastmem_marker) { - // Use fastmem - const auto dest_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - EmitWriteMemoryMov(code, dest_ptr, value.getIdx()); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); - - return; - } - - // Use page table - ASSERT(conf.page_table); - Xbyak::Label abort, end; - - const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - EmitWriteMemoryMov(code, dest_ptr, value.getIdx()); - code.L(end); - - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); -} +#define Axx A32 +#include "emit_x64_memory.cpp.inc" +#undef Axx void A32EmitX64::EmitA32ReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { EmitMemoryRead<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); @@ -296,268 +159,71 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { EmitMemoryWrite<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst); } -template -void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) { - using T = mp::unsigned_integer_of_size; - - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.HostCall(inst, {}, args[0]); - - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A32::UserConfig& conf, u32 vaddr) -> T { - return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { - return (conf.callbacks->*callback)(vaddr); - }); - }); - code.ZeroExtendFrom(bitsize, code.ABI_RETURN); -} - -template -void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) { - using T = mp::unsigned_integer_of_size; - - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); - - Xbyak::Label end; - - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.je(end); - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A32::UserConfig& conf, u32 vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - code.L(end); -} - -template -void A32EmitX64::ExclusiveReadMemoryInline(A32EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { - ExclusiveReadMemory(ctx, inst); - return; - } - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1)); - code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); - code.mov(qword[tmp], vaddr); - - const auto fastmem_marker = ShouldFastmem(ctx, inst); - if (fastmem_marker) { - Xbyak::Label end; - - const auto src_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value.getIdx(), src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_exclusive_fastmem_failure, - }); - - code.L(end); - } else { - code.call(wrapped_fn); - } - - code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); - EmitWriteMemoryMov(code, tmp, value.getIdx()); - - EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); - - ctx.reg_alloc.DefineValue(inst, value); -} - -template -void A32EmitX64::ExclusiveWriteMemoryInline(A32EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { - ExclusiveWriteMemory(ctx, inst); - return; - } - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - - const auto fallback_fn = exclusive_write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - EmitExclusiveLock(code, conf, tmp, eax); - - Xbyak::Label end; - - code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); - code.mov(status, u32(1)); - code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.je(end, code.T_NEAR); - code.cmp(qword[tmp], vaddr); - code.jne(end, code.T_NEAR); - - EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); - - code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); - code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); - - EmitReadMemoryMov(code, rax.getIdx(), tmp); - - const auto fastmem_marker = ShouldFastmem(ctx, inst); - if (fastmem_marker) { - const auto dest_ptr = r13 + vaddr; - - const auto location = code.getCurr(); - - switch (bitsize) { - case 8: - code.lock(); - code.cmpxchg(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.lock(); - code.cmpxchg(word[dest_ptr], value.cvt16()); - break; - case 32: - code.lock(); - code.cmpxchg(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.lock(); - code.cmpxchg(qword[dest_ptr], value.cvt64()); - break; - default: - UNREACHABLE(); - } - - code.setnz(status.cvt8()); - - code.SwitchToFarCode(); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(fallback_fn), - *fastmem_marker, - conf.recompile_on_exclusive_fastmem_failure, - }); - - code.cmp(al, 0); - code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } else { - code.call(fallback_fn); - code.cmp(al, 0); - code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); - } - - code.L(end); - - EmitExclusiveUnlock(code, conf, tmp, eax); - - ctx.reg_alloc.DefineValue(inst, status); -} - void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0)); } void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); + EmitExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); } else { - ExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); + EmitExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); + EmitExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); } else { - ExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); + EmitExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); + EmitExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); } else { - ExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); + EmitExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); + EmitExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); } else { - ExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); + EmitExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); + EmitExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); } else { - ExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); + EmitExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); + EmitExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); } else { - ExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); + EmitExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); + EmitExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); } else { - ExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); + EmitExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); } } void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { if (conf.fastmem_exclusive_access) { - ExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); + EmitExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); } else { - ExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); + EmitExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); } } diff --git a/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index 7755c9ff..9025b623 100644 --- a/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -265,167 +265,9 @@ void A64EmitX64::GenFastmemFallbacks() { } } -std::optional A64EmitX64::ShouldFastmem(A64EmitContext& ctx, IR::Inst* inst) const { - if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { - return std::nullopt; - } - - const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); - if (do_not_fastmem.count(marker) > 0) { - return std::nullopt; - } - return marker; -} - -FakeCall A64EmitX64::FastmemCallback(u64 rip_) { - const auto iter = fastmem_patch_info.find(rip_); - - if (iter == fastmem_patch_info.end()) { - fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); - fmt::print("Segfault wasn't at a fastmem patch location!\n"); - fmt::print("Now dumping code.......\n\n"); - Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); - ASSERT_FALSE("iter != fastmem_patch_info.end()"); - } - - if (iter->second.recompile) { - const auto marker = iter->second.marker; - do_not_fastmem.emplace(marker); - InvalidateBasicBlocks({std::get<0>(marker)}); - } - - return FakeCall{ - .call_rip = iter->second.callback, - .ret_rip = iter->second.resume_rip, - }; -} - -template -void A64EmitX64::EmitMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - if constexpr (bitsize == 128) { - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); - code.CallFunction(memory_read_128); - ctx.reg_alloc.DefineValue(inst, xmm1); - } else { - ctx.reg_alloc.HostCall(inst, {}, args[0]); - Devirtualize(conf.callbacks).EmitCall(code); - code.ZeroExtendFrom(bitsize, code.ABI_RETURN); - } - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value_idx, src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; - EmitReadMemoryMov(code, value_idx, src_ptr); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } - - if constexpr (bitsize == 128) { - ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); - } else { - ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); - } -} - -template -void A64EmitX64::EmitMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const auto fastmem_marker = ShouldFastmem(ctx, inst); - - if (!conf.page_table && !fastmem_marker) { - // Neither fastmem nor page table: Use callbacks - if constexpr (bitsize == 128) { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - code.CallFunction(memory_write_128); - } else { - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); - Devirtualize(conf.callbacks).EmitCall(code); - } - return; - } - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const int value_idx = bitsize == 128 ? ctx.reg_alloc.UseXmm(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx(); - - const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; - - Xbyak::Label abort, end; - bool require_abort_handling = false; - - if (fastmem_marker) { - // Use fastmem - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - EmitWriteMemoryMov(code, dest_ptr, value_idx); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_fastmem_failure, - }); - } else { - // Use page table - ASSERT(conf.page_table); - const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - require_abort_handling = true; - EmitWriteMemoryMov(code, dest_ptr, value_idx); - } - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } -} +#define Axx A64 +#include "emit_x64_memory.cpp.inc" +#undef Axx void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { EmitMemoryRead<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); @@ -467,295 +309,6 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { EmitMemoryWrite<128, &A64::UserCallbacks::MemoryWrite64>(ctx, inst); } -template -void A64EmitX64::EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if constexpr (bitsize != 128) { - using T = mp::unsigned_integer_of_size; - - ctx.reg_alloc.HostCall(inst, {}, args[0]); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr) -> T { - return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { - return (conf.callbacks->*callback)(vaddr); - }); - }); - code.ZeroExtendFrom(bitsize, code.ABI_RETURN); - } else { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, A64::Vector& ret) { - ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> A64::Vector { - return (conf.callbacks->*callback)(vaddr); - }); - }); - code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); - - ctx.reg_alloc.DefineValue(inst, result); - } -} - -template -void A64EmitX64::EmitExclusiveWriteMemory(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor != nullptr); - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if constexpr (bitsize != 128) { - ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); - } else { - ctx.reg_alloc.Use(args[0], ABI_PARAM2); - ctx.reg_alloc.Use(args[1], HostLoc::XMM1); - ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(inst); - } - - Xbyak::Label end; - - code.mov(code.ABI_RETURN, u32(1)); - code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.je(end); - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - if constexpr (bitsize != 128) { - using T = mp::unsigned_integer_of_size; - - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - } else { - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); - code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); - code.movaps(xword[code.ABI_PARAM3], xmm1); - code.CallLambda( - [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](A64::Vector expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; - }); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); - } - code.L(end); -} - -template -void A64EmitX64::EmitExclusiveReadMemoryInline(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { - EmitExclusiveReadMemory(ctx, inst); - return; - } - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); - - const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; - - EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); - code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); - code.mov(qword[tmp], vaddr); - - const auto fastmem_marker = ShouldFastmem(ctx, inst); - if (fastmem_marker) { - Xbyak::Label abort, end; - bool require_abort_handling = false; - - const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); - - const auto location = code.getCurr(); - EmitReadMemoryMov(code, value_idx, src_ptr); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(wrapped_fn), - *fastmem_marker, - conf.recompile_on_exclusive_fastmem_failure, - }); - - code.L(end); - - if (require_abort_handling) { - code.SwitchToFarCode(); - code.L(abort); - code.call(wrapped_fn); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } - } else { - code.call(wrapped_fn); - } - - code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); - EmitWriteMemoryMov(code, tmp, value_idx); - - EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); - - if constexpr (bitsize == 128) { - ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); - } else { - ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); - } -} - -template -void A64EmitX64::EmitExclusiveWriteMemoryInline(A64EmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { - EmitExclusiveWriteMemory(ctx, inst); - return; - } - - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - const auto value = [&] { - if constexpr (bitsize == 128) { - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RBX); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - return ctx.reg_alloc.UseXmm(args[1]); - } else { - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - return ctx.reg_alloc.UseGpr(args[1]); - } - }(); - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - - const auto fallback_fn = exclusive_write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; - - EmitExclusiveLock(code, conf, tmp, eax); - - Xbyak::Label end; - - code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); - code.mov(status, u32(1)); - code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.je(end, code.T_NEAR); - code.cmp(qword[tmp], vaddr); - code.jne(end, code.T_NEAR); - - EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); - - code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); - code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); - - if constexpr (bitsize == 128) { - code.mov(rax, qword[tmp + 0]); - code.mov(rdx, qword[tmp + 8]); - if (code.HasHostFeature(HostFeature::SSE41)) { - code.movq(rbx, value); - code.pextrq(rcx, value, 1); - } else { - code.movaps(xmm0, value); - code.movq(rbx, xmm0); - code.punpckhqdq(xmm0, xmm0); - code.movq(rcx, xmm0); - } - } else { - EmitReadMemoryMov(code, rax.getIdx(), tmp); - } - - const auto fastmem_marker = ShouldFastmem(ctx, inst); - if (fastmem_marker) { - Xbyak::Label abort; - bool require_abort_handling = false; - - const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling, tmp); - - const auto location = code.getCurr(); - - if constexpr (bitsize == 128) { - code.lock(); - code.cmpxchg16b(ptr[dest_ptr]); - } else { - switch (bitsize) { - case 8: - code.lock(); - code.cmpxchg(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.lock(); - code.cmpxchg(word[dest_ptr], value.cvt16()); - break; - case 32: - code.lock(); - code.cmpxchg(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.lock(); - code.cmpxchg(qword[dest_ptr], value.cvt64()); - break; - default: - UNREACHABLE(); - } - } - - code.setnz(status.cvt8()); - - code.SwitchToFarCode(); - code.L(abort); - code.call(fallback_fn); - - fastmem_patch_info.emplace( - Common::BitCast(location), - FastmemPatchInfo{ - Common::BitCast(code.getCurr()), - Common::BitCast(fallback_fn), - *fastmem_marker, - conf.recompile_on_exclusive_fastmem_failure, - }); - - code.cmp(al, 0); - code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); - code.jmp(end, code.T_NEAR); - code.SwitchToNearCode(); - } else { - code.call(fallback_fn); - code.cmp(al, 0); - code.setz(status.cvt8()); - code.movzx(status.cvt32(), status.cvt8()); - } - - code.L(end); - - EmitExclusiveUnlock(code, conf, tmp, eax); - - ctx.reg_alloc.DefineValue(inst, status); -} - void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); } diff --git a/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc new file mode 100644 index 00000000..1054f616 --- /dev/null +++ b/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -0,0 +1,471 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2022 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/common/macro_util.h" + +#define AxxEmitX64 CONCATENATE_TOKENS(Axx, EmitX64) +#define AxxEmitContext CONCATENATE_TOKENS(Axx, EmitContext) +#define AxxJitState CONCATENATE_TOKENS(Axx, JitState) +#define AxxUserConfig Axx::UserConfig + +namespace { +using Vector = std::array; +} + +std::optional AxxEmitX64::ShouldFastmem(AxxEmitContext& ctx, IR::Inst* inst) const { + if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) { + return std::nullopt; + } + + const auto marker = std::make_tuple(ctx.Location(), ctx.GetInstOffset(inst)); + if (do_not_fastmem.count(marker) > 0) { + return std::nullopt; + } + return marker; +} + +FakeCall AxxEmitX64::FastmemCallback(u64 rip_) { + const auto iter = fastmem_patch_info.find(rip_); + + if (iter == fastmem_patch_info.end()) { + fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_); + fmt::print("Segfault wasn't at a fastmem patch location!\n"); + fmt::print("Now dumping code.......\n\n"); + Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000); + ASSERT_FALSE("iter != fastmem_patch_info.end()"); + } + + if (iter->second.recompile) { + const auto marker = iter->second.marker; + do_not_fastmem.emplace(marker); + InvalidateBasicBlocks({std::get<0>(marker)}); + } + + return FakeCall{ + .call_rip = iter->second.callback, + .ret_rip = iter->second.resume_rip, + }; +} + +template +void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + if constexpr (bitsize == 128) { + ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + code.CallFunction(memory_read_128); + ctx.reg_alloc.DefineValue(inst, xmm1); + } else { + ctx.reg_alloc.HostCall(inst, {}, args[0]); + Devirtualize(conf.callbacks).EmitCall(code); + code.ZeroExtendFrom(bitsize, code.ABI_RETURN); + } + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); + + const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; + + Xbyak::Label abort, end; + bool require_abort_handling = false; + + if (fastmem_marker) { + // Use fastmem + const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + + const auto location = code.getCurr(); + EmitReadMemoryMov(code, value_idx, src_ptr); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_fastmem_failure, + }); + } else { + // Use page table + ASSERT(conf.page_table); + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + require_abort_handling = true; + EmitReadMemoryMov(code, value_idx, src_ptr); + } + code.L(end); + + if (require_abort_handling) { + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } + + if constexpr (bitsize == 128) { + ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); + } else { + ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); + } +} + +template +void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const auto fastmem_marker = ShouldFastmem(ctx, inst); + + if (!conf.page_table && !fastmem_marker) { + // Neither fastmem nor page table: Use callbacks + if constexpr (bitsize == 128) { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + code.CallFunction(memory_write_128); + } else { + ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + Devirtualize(conf.callbacks).EmitCall(code); + } + return; + } + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.UseXmm(args[1]).getIdx() : ctx.reg_alloc.UseGpr(args[1]).getIdx(); + + const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; + + Xbyak::Label abort, end; + bool require_abort_handling = false; + + if (fastmem_marker) { + // Use fastmem + const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + + const auto location = code.getCurr(); + EmitWriteMemoryMov(code, dest_ptr, value_idx); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_fastmem_failure, + }); + } else { + // Use page table + ASSERT(conf.page_table); + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + require_abort_handling = true; + EmitWriteMemoryMov(code, dest_ptr, value_idx); + } + code.L(end); + + if (require_abort_handling) { + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } +} + +template +void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if constexpr (bitsize != 128) { + using T = mp::unsigned_integer_of_size; + + ctx.reg_alloc.HostCall(inst, {}, args[0]); + + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); + }); + }); + code.ZeroExtendFrom(bitsize, code.ABI_RETURN); + } else { + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(nullptr); + + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& ret) { + ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> Vector { + return (conf.callbacks->*callback)(vaddr); + }); + }); + code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + + ctx.reg_alloc.DefineValue(inst, result); + } +} + +template +void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor != nullptr); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if constexpr (bitsize != 128) { + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + } else { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM1); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(inst); + } + + Xbyak::Label end; + + code.mov(code.ABI_RETURN, u32(1)); + code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + if constexpr (bitsize != 128) { + using T = mp::unsigned_integer_of_size; + + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr, T value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); + } else { + ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm1); + code.CallLambda( + [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, + [&](Vector expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) + ? 0 + : 1; + }); + ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + } + code.L(end); +} + +template +void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor && conf.fastmem_pointer); + if (!exception_handler.SupportsFastmem()) { + EmitExclusiveReadMemory(ctx, inst); + return; + } + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); + + const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]; + + EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); + + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1)); + code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); + code.mov(qword[tmp], vaddr); + + const auto fastmem_marker = ShouldFastmem(ctx, inst); + if (fastmem_marker) { + Xbyak::Label abort, end; + bool require_abort_handling = false; + + const auto src_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling); + + const auto location = code.getCurr(); + EmitReadMemoryMov(code, value_idx, src_ptr); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(wrapped_fn), + *fastmem_marker, + conf.recompile_on_exclusive_fastmem_failure, + }); + + code.L(end); + + if (require_abort_handling) { + code.SwitchToFarCode(); + code.L(abort); + code.call(wrapped_fn); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } + } else { + code.call(wrapped_fn); + } + + code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); + EmitWriteMemoryMov(code, tmp, value_idx); + + EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); + + if constexpr (bitsize == 128) { + ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); + } else { + ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); + } +} + +template +void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) { + ASSERT(conf.global_monitor && conf.fastmem_pointer); + if (!exception_handler.SupportsFastmem()) { + EmitExclusiveWriteMemory(ctx, inst); + return; + } + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const auto value = [&] { + if constexpr (bitsize == 128) { + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(HostLoc::RBX); + ctx.reg_alloc.ScratchGpr(HostLoc::RCX); + ctx.reg_alloc.ScratchGpr(HostLoc::RDX); + return ctx.reg_alloc.UseXmm(args[1]); + } else { + ctx.reg_alloc.ScratchGpr(HostLoc::RAX); + return ctx.reg_alloc.UseGpr(args[1]); + } + }(); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + + const auto fallback_fn = exclusive_write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + + EmitExclusiveLock(code, conf, tmp, eax); + + Xbyak::Label end; + + code.mov(tmp, Common::BitCast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); + code.mov(status, u32(1)); + code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.je(end, code.T_NEAR); + code.cmp(qword[tmp], vaddr); + code.jne(end, code.T_NEAR); + + EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); + + code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0)); + code.mov(tmp, Common::BitCast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); + + if constexpr (bitsize == 128) { + code.mov(rax, qword[tmp + 0]); + code.mov(rdx, qword[tmp + 8]); + if (code.HasHostFeature(HostFeature::SSE41)) { + code.movq(rbx, value); + code.pextrq(rcx, value, 1); + } else { + code.movaps(xmm0, value); + code.movq(rbx, xmm0); + code.punpckhqdq(xmm0, xmm0); + code.movq(rcx, xmm0); + } + } else { + EmitReadMemoryMov(code, rax.getIdx(), tmp); + } + + const auto fastmem_marker = ShouldFastmem(ctx, inst); + if (fastmem_marker) { + Xbyak::Label abort; + bool require_abort_handling = false; + + const auto dest_ptr = EmitFastmemVAddr(code, ctx, abort, vaddr, require_abort_handling, tmp); + + const auto location = code.getCurr(); + + if constexpr (bitsize == 128) { + code.lock(); + code.cmpxchg16b(ptr[dest_ptr]); + } else { + switch (bitsize) { + case 8: + code.lock(); + code.cmpxchg(code.byte[dest_ptr], value.cvt8()); + break; + case 16: + code.lock(); + code.cmpxchg(word[dest_ptr], value.cvt16()); + break; + case 32: + code.lock(); + code.cmpxchg(dword[dest_ptr], value.cvt32()); + break; + case 64: + code.lock(); + code.cmpxchg(qword[dest_ptr], value.cvt64()); + break; + default: + UNREACHABLE(); + } + } + + code.setnz(status.cvt8()); + + code.SwitchToFarCode(); + code.L(abort); + code.call(fallback_fn); + + fastmem_patch_info.emplace( + Common::BitCast(location), + FastmemPatchInfo{ + Common::BitCast(code.getCurr()), + Common::BitCast(fallback_fn), + *fastmem_marker, + conf.recompile_on_exclusive_fastmem_failure, + }); + + code.cmp(al, 0); + code.setz(status.cvt8()); + code.movzx(status.cvt32(), status.cvt8()); + code.jmp(end, code.T_NEAR); + code.SwitchToNearCode(); + } else { + code.call(fallback_fn); + code.cmp(al, 0); + code.setz(status.cvt8()); + code.movzx(status.cvt32(), status.cvt8()); + } + + code.L(end); + + EmitExclusiveUnlock(code, conf, tmp, eax); + + ctx.reg_alloc.DefineValue(inst, status); +} + +#undef AxxEmitX64 +#undef AxxEmitContext +#undef AxxJitState +#undef AxxUserConfig