diff --git a/include/dynarmic/A64/a64.h b/include/dynarmic/A64/a64.h index 0e3808ff..02123f6a 100644 --- a/include/dynarmic/A64/a64.h +++ b/include/dynarmic/A64/a64.h @@ -100,6 +100,9 @@ public: /// Modify PSTATE void SetPstate(std::uint32_t value); + /// Clears exclusive state for this core. + void ClearExclusiveState(); + /** * Returns true if Jit::Run was called but hasn't returned yet. * i.e.: We're in a callback. diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 5ba02e0e..93315d51 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -106,9 +106,14 @@ struct UserCallbacks { virtual std::uint64_t GetCNTPCT() = 0; }; +class ExclusiveMonitor; + struct UserConfig { UserCallbacks* callbacks; + size_t processor_id = 0; + ExclusiveMonitor* global_monitor = nullptr; + /// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any /// data cache instruction is executed. Notably DC ZVA will not implicitly do anything. /// When set to false, UserCallbacks::DataCacheOperationRaised will never be called. diff --git a/include/dynarmic/A64/exclusive_monitor.h b/include/dynarmic/A64/exclusive_monitor.h new file mode 100644 index 00000000..16578f11 --- /dev/null +++ b/include/dynarmic/A64/exclusive_monitor.h @@ -0,0 +1,64 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#pragma once + +#include +#include +#include +#include + +namespace Dynarmic { +namespace A64 { + +using VAddr = std::uint64_t; + +class ExclusiveMonitor { +public: + /// @param processor_count Maximum number of processors using this global + /// exclusive monitor. Each processor must have a + /// unique id. + explicit ExclusiveMonitor(size_t processor_count); + + size_t GetProcessorCount() const; + + /// Marks a region containing [address, address+size) to be exclusive to + /// processor processor_id. + void Mark(size_t processor_id, VAddr address, size_t size); + + /// Checks to see if processor processor_id has exclusive access to the + /// specified region. If it does, executes the operation then clears + /// the exclusive state for processors if their exclusive region(s) + /// contain [address, address+size). + template + bool DoExclusiveOperation(size_t processor_id, VAddr address, size_t size, Function op) { + if (!CheckAndClear(processor_id, address, size)) { + return false; + } + + op(); + + Unlock(); + return true; + } + + /// Unmark everything. + void Clear(); + +private: + bool CheckAndClear(size_t processor_id, VAddr address, size_t size); + + void Lock(); + void Unlock(); + + static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull; + static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull; + std::atomic_flag is_locked; + std::vector exclusive_addresses; +}; + +} // namespace A64 +} // namespace Dynarmic diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1fc2e6be..118f8799 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,6 +4,9 @@ add_library(dynarmic ../include/dynarmic/A32/coprocessor.h ../include/dynarmic/A32/coprocessor_util.h ../include/dynarmic/A32/disassembler.h + ../include/dynarmic/A64/a64.h + ../include/dynarmic/A64/config.h + ../include/dynarmic/A64/exclusive_monitor.h common/address_range.h common/aes.cpp common/aes.h @@ -84,10 +87,10 @@ add_library(dynarmic frontend/A64/translate/impl/data_processing_shift.cpp frontend/A64/translate/impl/exception_generating.cpp frontend/A64/translate/impl/floating_point_compare.cpp - frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp - frontend/A64/translate/impl/floating_point_conversion_integer.cpp frontend/A64/translate/impl/floating_point_conditional_compare.cpp frontend/A64/translate/impl/floating_point_conditional_select.cpp + frontend/A64/translate/impl/floating_point_conversion_fixed_point.cpp + frontend/A64/translate/impl/floating_point_conversion_integer.cpp frontend/A64/translate/impl/floating_point_data_processing_one_register.cpp frontend/A64/translate/impl/floating_point_data_processing_three_register.cpp frontend/A64/translate/impl/floating_point_data_processing_two_register.cpp @@ -164,6 +167,7 @@ if (ARCHITECTURE_x86_64) backend_x64/a32_jitstate.h backend_x64/a64_emit_x64.cpp backend_x64/a64_emit_x64.h + backend_x64/a64_exclusive_monitor.cpp backend_x64/a64_interface.cpp backend_x64/a64_jitstate.cpp backend_x64/a64_jitstate.h diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 006c18fa..54c4163c 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -6,6 +6,7 @@ #include +#include #include #include "backend_x64/a64_emit_x64.h" @@ -485,6 +486,8 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { DEVIRT(conf.callbacks, &A64::UserCallbacks::CallSVC).EmitCall(code, [&](RegList param) { code.mov(param[0], imm); }); + // The kernel would have to execute ERET to get here, which would clear exclusive state. + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); } void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { @@ -567,6 +570,21 @@ void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { } void A64EmitX64::EmitA64SetExclusive(A64EmitContext& ctx, IR::Inst* inst) { + if (conf.global_monitor) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, u8 size) { + conf.global_monitor->Mark(conf.processor_id, vaddr, size); + } + )); + + return; + } + auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); Xbyak::Reg64 address = ctx.reg_alloc.UseGpr(args[0]); @@ -817,7 +835,89 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { code.CallFunction(memory_write_128); } -void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, int value_idx) { +void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { + if (conf.global_monitor) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + if (bitsize != 128) { + ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]); + } else { + ctx.reg_alloc.Use(args[0], ABI_PARAM2); + ctx.reg_alloc.Use(args[1], HostLoc::XMM0); + ctx.reg_alloc.EndOfAllocScope(); + ctx.reg_alloc.HostCall(inst); + } + + Xbyak::Label end; + + code.mov(code.ABI_RETURN, u32(1)); + code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); + switch (bitsize) { + case 8: + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, u8 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 1, [&]{ + conf.callbacks->MemoryWrite8(vaddr, value); + }) ? 0 : 1; + } + )); + break; + case 16: + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, u16 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 2, [&]{ + conf.callbacks->MemoryWrite16(vaddr, value); + }) ? 0 : 1; + } + )); + break; + case 32: + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, u32 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 4, [&]{ + conf.callbacks->MemoryWrite32(vaddr, value); + }) ? 0 : 1; + } + )); + break; + case 64: + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, u64 value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 8, [&]{ + conf.callbacks->MemoryWrite64(vaddr, value); + }) ? 0 : 1; + } + )); + break; + case 128: + code.sub(rsp, 8 + 16 + ABI_SHADOW_SPACE); + code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); + code.movaps(xword[code.ABI_PARAM3], xmm0); + code.CallFunction(static_cast( + [](A64::UserConfig& conf, u64 vaddr, A64::Vector& value) -> u32 { + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, 16, [&]{ + conf.callbacks->MemoryWrite128(vaddr, value); + }) ? 0 : 1; + } + )); + code.add(rsp, 8 + 16 + ABI_SHADOW_SPACE); + break; + default: + UNREACHABLE(); + } + code.L(end); + + return; + } + + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + int value_idx = bitsize != 128 + ? ctx.reg_alloc.UseGpr(args[1]).getIdx() + : ctx.reg_alloc.UseXmm(args[1]).getIdx(); + Xbyak::Label end; Xbyak::Reg32 passed = ctx.reg_alloc.ScratchGpr().cvt32(); Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); @@ -838,38 +938,23 @@ void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t } void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - EmitExclusiveWrite(ctx, inst, 8, vaddr, value.getIdx()); + EmitExclusiveWrite(ctx, inst, 8); } void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - EmitExclusiveWrite(ctx, inst, 16, vaddr, value.getIdx()); + EmitExclusiveWrite(ctx, inst, 16); } void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - EmitExclusiveWrite(ctx, inst, 32, vaddr, value.getIdx()); + EmitExclusiveWrite(ctx, inst, 32); } void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - EmitExclusiveWrite(ctx, inst, 64, vaddr, value.getIdx()); + EmitExclusiveWrite(ctx, inst, 64); } void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { - auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]); - EmitExclusiveWrite(ctx, inst, 128, vaddr, value.getIdx()); + EmitExclusiveWrite(ctx, inst, 128); } void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 8108bda7..53e25853 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -60,7 +60,7 @@ protected: void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); - void EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, int value_idx); + void EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); // Microinstruction emitters #define OPCODE(...) diff --git a/src/backend_x64/a64_exclusive_monitor.cpp b/src/backend_x64/a64_exclusive_monitor.cpp new file mode 100644 index 00000000..e0373fde --- /dev/null +++ b/src/backend_x64/a64_exclusive_monitor.cpp @@ -0,0 +1,65 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include +#include "common/assert.h" + +namespace Dynarmic { +namespace A64 { + +ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS) { + Unlock(); +} + +size_t ExclusiveMonitor::GetProcessorCount() const { + return exclusive_addresses.size(); +} + +void ExclusiveMonitor::Mark(size_t processor_id, VAddr address, size_t size) { + ASSERT(size <= 16); + const VAddr masked_address = address & RESERVATION_GRANULE_MASK; + + Lock(); + exclusive_addresses[processor_id] = masked_address; + Unlock(); +} + +void ExclusiveMonitor::Lock() { + while (is_locked.test_and_set()) {} +} + +void ExclusiveMonitor::Unlock() { + is_locked.clear(); +} + +bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address, size_t size) { + ASSERT(size <= 16); + const VAddr masked_address = address & RESERVATION_GRANULE_MASK; + + Lock(); + if (exclusive_addresses[processor_id] != masked_address) { + Unlock(); + return false; + } + + for (VAddr& other_address : exclusive_addresses) { + if (other_address == masked_address) { + other_address = INVALID_EXCLUSIVE_ADDRESS; + } + } + return true; +} + +void ExclusiveMonitor::Clear() { + Lock(); + std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS); + Unlock(); +} + +} // namespace A64 +} // namespace Dynarmic diff --git a/src/backend_x64/a64_interface.cpp b/src/backend_x64/a64_interface.cpp index 8e3f88c1..6d55fbe9 100644 --- a/src/backend_x64/a64_interface.cpp +++ b/src/backend_x64/a64_interface.cpp @@ -167,6 +167,10 @@ public: jit_state.SetPstate(value); } + void ClearExclusiveState() { + jit_state.exclusive_state = 0; + } + bool IsExecuting() const { return is_executing; } @@ -338,6 +342,10 @@ void Jit::SetPstate(u32 value) { impl->SetPstate(value); } +void Jit::ClearExclusiveState() { + impl->ClearExclusiveState(); +} + bool Jit::IsExecuting() const { return impl->IsExecuting(); }