A32 global exlcusive monitor

This commit is contained in:
MerryMage 2020-06-16 15:46:47 +01:00
parent 58abdcce5b
commit 2c1a4843ad
12 changed files with 260 additions and 56 deletions

View file

@ -16,6 +16,7 @@ namespace A32 {
using VAddr = std::uint32_t;
class Coprocessor;
class ExclusiveMonitor;
enum class Exception {
/// An UndefinedFault occured due to executing instruction with an unallocated encoding
@ -62,6 +63,12 @@ struct UserCallbacks {
virtual void MemoryWrite32(VAddr vaddr, std::uint32_t value) = 0;
virtual void MemoryWrite64(VAddr vaddr, std::uint64_t value) = 0;
// Writes through these callbacks may not be aligned.
virtual bool MemoryWriteExclusive8(VAddr /*vaddr*/, std::uint8_t /*value*/, std::uint8_t /*expected*/) { return false; }
virtual bool MemoryWriteExclusive16(VAddr /*vaddr*/, std::uint16_t /*value*/, std::uint16_t /*expected*/) { return false; }
virtual bool MemoryWriteExclusive32(VAddr /*vaddr*/, std::uint32_t /*value*/, std::uint32_t /*expected*/) { return false; }
virtual bool MemoryWriteExclusive64(VAddr /*vaddr*/, std::uint64_t /*value*/, std::uint64_t /*expected*/) { return false; }
// If this callback returns true, the JIT will assume MemoryRead* callbacks will always
// return the same value at any point in time for this vaddr. The JIT may use this information
// in optimizations.
@ -86,6 +93,9 @@ struct UserCallbacks {
struct UserConfig {
UserCallbacks* callbacks;
size_t processor_id = 0;
ExclusiveMonitor* global_monitor = nullptr;
/// When set to false, this disables all optimizations than can't otherwise be disabled
/// by setting other configuration options. This includes:
/// - IR optimizations

View file

@ -0,0 +1,81 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <atomic>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>
namespace Dynarmic {
namespace A32 {
using VAddr = std::uint32_t;
class ExclusiveMonitor {
public:
/// @param processor_count Maximum number of processors using this global
/// exclusive monitor. Each processor must have a
/// unique id.
explicit ExclusiveMonitor(size_t processor_count);
size_t GetProcessorCount() const;
/// Marks a region containing [address, address+size) to be exclusive to
/// processor processor_id.
template <typename T, typename Function>
T ReadAndMark(size_t processor_id, VAddr address, Function op) {
static_assert(std::is_trivially_copyable_v<T>);
const VAddr masked_address = address;
Lock();
exclusive_addresses[processor_id] = masked_address;
const T value = op();
std::memcpy(&exclusive_values[processor_id], &value, sizeof(T));
Unlock();
return value;
}
/// Checks to see if processor processor_id has exclusive access to the
/// specified region. If it does, executes the operation then clears
/// the exclusive state for processors if their exclusive region(s)
/// contain [address, address+size).
template <typename T, typename Function>
bool DoExclusiveOperation(size_t processor_id, VAddr address, Function op) {
static_assert(std::is_trivially_copyable_v<T>);
if (!CheckAndClear(processor_id, address)) {
return false;
}
T saved_value;
std::memcpy(&saved_value, &exclusive_values[processor_id], sizeof(T));
const bool result = op(saved_value);
Unlock();
return result;
}
/// Unmark everything.
void Clear();
/// Unmark processor id
void ClearProcessor(size_t processor_id);
private:
bool CheckAndClear(size_t processor_id, VAddr address);
void Lock();
void Unlock();
static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEADDEAD;
std::atomic_flag is_locked;
std::vector<VAddr> exclusive_addresses;
std::vector<std::uint64_t> exclusive_values;
};
} // namespace A32
} // namespace Dynarmic

View file

@ -273,6 +273,7 @@ if (ARCHITECTURE_x86_64)
target_sources(dynarmic PRIVATE
backend/x64/a32_emit_x64.cpp
backend/x64/a32_emit_x64.h
backend/x64/a32_exclusive_monitor.cpp
backend/x64/a32_interface.cpp
backend/x64/a32_jitstate.cpp
backend/x64/a32_jitstate.h

View file

@ -9,8 +9,10 @@
#include <fmt/format.h>
#include <fmt/ostream.h>
#include <mp/traits/integer_of_size.h>
#include <dynarmic/A32/coprocessor.h>
#include <dynarmic/A32/exclusive_monitor.h>
#include "backend/x64/a32_emit_x64.h"
#include "backend/x64/a32_jitstate.h"
@ -848,15 +850,6 @@ void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
}
void A32EmitX64::EmitA32SetExclusive(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
const Xbyak::Reg32 address = ctx.reg_alloc.UseGpr(args[0]).cvt32();
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1));
code.mov(dword[r15 + offsetof(A32JitState, exclusive_address)], address);
}
std::optional<A32EmitX64::DoNotFastmemMarker> A32EmitX64::ShouldFastmem(A32EmitContext& ctx, IR::Inst* inst) const {
if (!conf.fastmem_pointer || !exception_handler.SupportsFastmem()) {
return std::nullopt;
@ -1062,43 +1055,82 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
}
template <size_t bitsize, auto callback>
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>;
ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
const Xbyak::Reg32 passed = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = code.ABI_RETURN.cvt32(); // Use one of the unused HostCall registers.
ctx.reg_alloc.HostCall(inst, {}, args[0]);
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
code.CallLambda(
[](A32::UserConfig& conf, u32 vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
}
);
}
template <size_t bitsize, auto callback>
void A32EmitX64::ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst) {
using T = mp::unsigned_integer_of_size<bitsize>;
ASSERT(conf.global_monitor != nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, {}, args[0], args[1]);
Xbyak::Label end;
code.mov(passed, u32(1));
code.mov(code.ABI_RETURN, u32(1));
code.cmp(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
code.je(end);
code.mov(tmp, code.ABI_PARAM2);
code.xor_(tmp, dword[r15 + offsetof(A32JitState, exclusive_address)]);
code.test(tmp, A32JitState::RESERVATION_GRANULE_MASK);
code.jne(end);
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
Devirtualize<callback>(conf.callbacks).EmitCall(code);
code.xor_(passed, passed);
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
code.CallLambda(
[](A32::UserConfig& conf, u32 vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<u8>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1;
}
);
code.L(end);
}
ctx.reg_alloc.DefineValue(inst, passed);
void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWrite8>(ctx, inst);
ExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWrite16>(ctx, inst);
ExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWrite32>(ctx, inst);
ExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
ExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWrite64>(ctx, inst);
ExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
}
static void EmitCoprocessorException() {

View file

@ -103,6 +103,8 @@ protected:
template<std::size_t bitsize, auto callback>
void WriteMemory(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void ExclusiveReadMemory(A32EmitContext& ctx, IR::Inst* inst);
template<std::size_t bitsize, auto callback>
void ExclusiveWriteMemory(A32EmitContext& ctx, IR::Inst* inst);
// Terminal instruction emitters

View file

@ -0,0 +1,62 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include <algorithm>
#include <dynarmic/A32/exclusive_monitor.h>
#include "common/assert.h"
namespace Dynarmic {
namespace A32 {
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) :
exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {
Unlock();
}
size_t ExclusiveMonitor::GetProcessorCount() const {
return exclusive_addresses.size();
}
void ExclusiveMonitor::Lock() {
while (is_locked.test_and_set(std::memory_order_acquire)) {}
}
void ExclusiveMonitor::Unlock() {
is_locked.clear(std::memory_order_release);
}
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
const VAddr masked_address = address;
Lock();
if (exclusive_addresses[processor_id] != masked_address) {
Unlock();
return false;
}
for (VAddr& other_address : exclusive_addresses) {
if (other_address == masked_address) {
other_address = INVALID_EXCLUSIVE_ADDRESS;
}
}
return true;
}
void ExclusiveMonitor::Clear() {
Lock();
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
Unlock();
}
void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
Lock();
exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
Unlock();
}
} // namespace A32
} // namespace Dynarmic

View file

@ -55,9 +55,7 @@ struct A32JitState {
bool check_bit = false;
// Exclusive state
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
u32 exclusive_state = 0;
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
static constexpr size_t RSBPtrMask = RSBSize - 1;
@ -90,7 +88,6 @@ struct A32JitState {
fpsr_nzcv = src.fpsr_nzcv;
exclusive_state = 0;
exclusive_address = 0;
if (reset_rsb) {
ResetRSB();

View file

@ -188,11 +188,6 @@ void IREmitter::ClearExclusive() {
Inst(Opcode::A32ClearExclusive);
}
void IREmitter::SetExclusive(const IR::U32& vaddr, size_t byte_size) {
ASSERT(byte_size == 1 || byte_size == 2 || byte_size == 4 || byte_size == 8 || byte_size == 16);
Inst(Opcode::A32SetExclusive, vaddr, Imm8(u8(byte_size)));
}
IR::UAny IREmitter::ReadMemory(size_t bitsize, const IR::U32& vaddr) {
switch (bitsize) {
case 8:
@ -226,6 +221,31 @@ IR::U64 IREmitter::ReadMemory64(const IR::U32& vaddr) {
return current_location.EFlag() ? ByteReverseDual(value) : value;
}
IR::U8 IREmitter::ExclusiveReadMemory8(const IR::U32& vaddr) {
return Inst<IR::U8>(Opcode::A32ExclusiveReadMemory8, vaddr);
}
IR::U16 IREmitter::ExclusiveReadMemory16(const IR::U32& vaddr) {
const auto value = Inst<IR::U16>(Opcode::A32ExclusiveReadMemory16, vaddr);
return current_location.EFlag() ? ByteReverseHalf(value) : value;
}
IR::U32 IREmitter::ExclusiveReadMemory32(const IR::U32& vaddr) {
const auto value = Inst<IR::U32>(Opcode::A32ExclusiveReadMemory32, vaddr);
return current_location.EFlag() ? ByteReverseWord(value) : value;
}
std::pair<IR::U32, IR::U32> IREmitter::ExclusiveReadMemory64(const IR::U32& vaddr) {
const auto value = Inst<IR::U64>(Opcode::A32ExclusiveReadMemory64, vaddr);
const auto lo = LeastSignificantWord(value);
const auto hi = MostSignificantWord(value).result;
if (current_location.EFlag()) {
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
return std::make_pair(ByteReverseWord(lo), ByteReverseWord(hi));
}
return std::make_pair(lo, hi);
}
void IREmitter::WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value) {
switch (bitsize) {
case 8:

View file

@ -5,6 +5,8 @@
#pragma once
#include <utility>
#include "common/common_types.h"
#include "frontend/A32/location_descriptor.h"
#include "frontend/ir/ir_emitter.h"
@ -71,12 +73,15 @@ public:
void SetFpscrNZCV(const IR::NZCV& new_fpscr_nzcv);
void ClearExclusive();
void SetExclusive(const IR::U32& vaddr, size_t byte_size);
IR::UAny ReadMemory(size_t bitsize, const IR::U32& vaddr);
IR::U8 ReadMemory8(const IR::U32& vaddr);
IR::U16 ReadMemory16(const IR::U32& vaddr);
IR::U32 ReadMemory32(const IR::U32& vaddr);
IR::U64 ReadMemory64(const IR::U32& vaddr);
IR::U8 ExclusiveReadMemory8(const IR::U32& vaddr);
IR::U16 ExclusiveReadMemory16(const IR::U32& vaddr);
IR::U32 ExclusiveReadMemory32(const IR::U32& vaddr);
std::pair<IR::U32, IR::U32> ExclusiveReadMemory64(const IR::U32& vaddr);
void WriteMemory(size_t bitsize, const IR::U32& vaddr, const IR::UAny& value);
void WriteMemory8(const IR::U32& vaddr, const IR::U8& value);
void WriteMemory16(const IR::U32& vaddr, const IR::U16& value);

View file

@ -103,8 +103,7 @@ bool ArmTranslatorVisitor::arm_LDAEX(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 4);
ir.SetRegister(t, ir.ReadMemory32(address)); // AccType::Ordered
ir.SetRegister(t, ir.ExclusiveReadMemory32(address)); // AccType::Ordered
return true;
}
@ -119,8 +118,7 @@ bool ArmTranslatorVisitor::arm_LDAEXB(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 1);
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ReadMemory8(address))); // AccType::Ordered
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address))); // AccType::Ordered
return true;
}
@ -135,12 +133,9 @@ bool ArmTranslatorVisitor::arm_LDAEXD(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 8);
const auto [lo, hi] = ir.ExclusiveReadMemory64(address); // AccType::Ordered
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
const auto lo = ir.ReadMemory32(address); // AccType::Ordered
ir.SetRegister(t, lo);
const auto hi = ir.ReadMemory32(ir.Add(address, ir.Imm32(4))); // AccType::Ordered
ir.SetRegister(t+1, hi);
return true;
}
@ -156,8 +151,7 @@ bool ArmTranslatorVisitor::arm_LDAEXH(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 2);
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ReadMemory16(address))); // AccType::Ordered
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address))); // AccType::Ordered
return true;
}
@ -302,8 +296,7 @@ bool ArmTranslatorVisitor::arm_LDREX(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 4);
ir.SetRegister(t, ir.ReadMemory32(address));
ir.SetRegister(t, ir.ExclusiveReadMemory32(address));
return true;
}
@ -318,8 +311,7 @@ bool ArmTranslatorVisitor::arm_LDREXB(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 1);
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ReadMemory8(address)));
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address)));
return true;
}
@ -334,12 +326,9 @@ bool ArmTranslatorVisitor::arm_LDREXD(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 8);
const auto [lo, hi] = ir.ExclusiveReadMemory64(address);
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
const auto lo = ir.ReadMemory32(address);
ir.SetRegister(t, lo);
const auto hi = ir.ReadMemory32(ir.Add(address, ir.Imm32(4)));
ir.SetRegister(t+1, hi);
return true;
}
@ -355,8 +344,7 @@ bool ArmTranslatorVisitor::arm_LDREXH(Cond cond, Reg n, Reg t) {
}
const auto address = ir.GetRegister(n);
ir.SetExclusive(address, 2);
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ReadMemory16(address)));
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address)));
return true;
}

View file

@ -101,6 +101,10 @@ bool Inst::IsSharedMemoryReadOrWrite() const {
bool Inst::IsExclusiveMemoryRead() const {
switch (op) {
case Opcode::A32ExclusiveReadMemory8:
case Opcode::A32ExclusiveReadMemory16:
case Opcode::A32ExclusiveReadMemory32:
case Opcode::A32ExclusiveReadMemory64:
case Opcode::A64ExclusiveReadMemory8:
case Opcode::A64ExclusiveReadMemory16:
case Opcode::A64ExclusiveReadMemory32:
@ -487,7 +491,6 @@ bool Inst::CausesCPUException() const {
bool Inst::AltersExclusiveState() const {
return op == Opcode::A32ClearExclusive ||
op == Opcode::A32SetExclusive ||
op == Opcode::A64ClearExclusive ||
IsExclusiveMemoryRead() ||
IsExclusiveMemoryWrite();

View file

@ -641,11 +641,14 @@ OPCODE(FPVectorToUnsignedFixed64, U128, U128
// A32 Memory access
A32OPC(ClearExclusive, Void, )
A32OPC(SetExclusive, Void, U32, U8 )
A32OPC(ReadMemory8, U8, U32 )
A32OPC(ReadMemory16, U16, U32 )
A32OPC(ReadMemory32, U32, U32 )
A32OPC(ReadMemory64, U64, U32 )
A32OPC(ExclusiveReadMemory8, U8, U32 )
A32OPC(ExclusiveReadMemory16, U16, U32 )
A32OPC(ExclusiveReadMemory32, U32, U32 )
A32OPC(ExclusiveReadMemory64, U64, U32 )
A32OPC(WriteMemory8, Void, U32, U8 )
A32OPC(WriteMemory16, Void, U32, U16 )
A32OPC(WriteMemory32, Void, U32, U32 )