diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index c253e6c9..ab920c98 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -224,7 +224,7 @@ void A64EmitX64::GenFastmemFallbacks() { ABI_PopCallerSaveRegistersAndAdjustStack(code); code.ret(); - if (vaddr_idx == value_idx || value_idx == 4 || value_idx == 15) { + if (value_idx == 4 || value_idx == 15) { continue; } @@ -249,13 +249,18 @@ void A64EmitX64::GenFastmemFallbacks() { ABI_PushCallerSaveRegistersAndAdjustStack(code); if (vaddr_idx == code.ABI_PARAM3.getIdx() && value_idx == code.ABI_PARAM2.getIdx()) { code.xchg(code.ABI_PARAM2, code.ABI_PARAM3); - } else { - if (vaddr_idx != code.ABI_PARAM2.getIdx()) { - code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); - } + } else if (vaddr_idx == code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); if (value_idx != code.ABI_PARAM3.getIdx()) { code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); } + } else { + if (value_idx != code.ABI_PARAM3.getIdx()) { + code.mov(code.ABI_PARAM3, Xbyak::Reg64{value_idx}); + } + if (vaddr_idx != code.ABI_PARAM2.getIdx()) { + code.mov(code.ABI_PARAM2, Xbyak::Reg64{vaddr_idx}); + } } callback.EmitCall(code); ABI_PopCallerSaveRegistersAndAdjustStack(code); @@ -480,6 +485,19 @@ void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); } +void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); +} + +void A64EmitX64::EmitA64SetExclusive(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + ASSERT(args[1].IsImmediate()); + Xbyak::Reg32 address = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(1)); + code.mov(dword[r15 + offsetof(A64JitState, exclusive_address)], address); +} + static Xbyak::RegExp EmitVAddrLookup(const A64::UserConfig& conf, BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, boost::optional arg_scratch = {}) { constexpr size_t PAGE_BITS = 12; constexpr size_t PAGE_SIZE = 1 << PAGE_BITS; @@ -722,6 +740,61 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { code.CallFunction(memory_write_128); } +void A64EmitX64::EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, size_t value_idx) { + Xbyak::Label end; + Xbyak::Reg32 passed = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + + code.mov(passed, u32(1)); + code.cmp(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.je(end); + code.mov(tmp, vaddr); + code.xor_(tmp, dword[r15 + offsetof(A64JitState, exclusive_address)]); + code.test(tmp, A64JitState::RESERVATION_GRANULE_MASK); + code.jne(end); + code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0)); + code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value_idx)]); + code.xor_(passed, passed); + code.L(end); + + ctx.reg_alloc.DefineValue(inst, passed); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 8, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 16, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 32, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + EmitExclusiveWrite(ctx, inst, 64, vaddr, value.getIdx()); +} + +void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); + Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[1]); + EmitExclusiveWrite(ctx, inst, 128, vaddr, value.getIdx()); +} + void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor) { code.SwitchMxcsrOnExit(); DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](RegList param) { diff --git a/src/backend_x64/a64_emit_x64.h b/src/backend_x64/a64_emit_x64.h index 415958e5..9ba3e5d4 100644 --- a/src/backend_x64/a64_emit_x64.h +++ b/src/backend_x64/a64_emit_x64.h @@ -57,6 +57,7 @@ protected: void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); + void EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize, Xbyak::Reg64 vaddr, size_t value_idx); // Microinstruction emitters #define OPCODE(...) diff --git a/src/backend_x64/a64_jitstate.h b/src/backend_x64/a64_jitstate.h index 16297191..84b79e19 100644 --- a/src/backend_x64/a64_jitstate.h +++ b/src/backend_x64/a64_jitstate.h @@ -56,6 +56,11 @@ struct A64JitState { bool halt_requested = false; bool check_bit = false; + // Exclusive state + static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8; + u32 exclusive_state = 0; + u32 exclusive_address = 0; + static constexpr size_t RSBSize = 8; // MUST be a power of 2. static constexpr size_t RSBPtrMask = RSBSize - 1; u32 rsb_ptr = 0; diff --git a/src/frontend/A64/decoder/a64.inc b/src/frontend/A64/decoder/a64.inc index f3fdbf4f..d6345a63 100644 --- a/src/frontend/A64/decoder/a64.inc +++ b/src/frontend/A64/decoder/a64.inc @@ -135,12 +135,12 @@ INST(LDx_mult_2, "LDx (multiple structures)", "0Q001 //INST(LD4R_2, "LD4R", "0Q001101111mmmmm1110zznnnnnttttt") // Loads and stores - Load/Store Exclusive -//INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") -//INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") +INST(STXR, "STXRB, STXRH, STXR", "zz001000000sssss011111nnnnnttttt") +INST(STLXR, "STLXRB, STLXRH, STLXR", "zz001000000sssss111111nnnnnttttt") //INST(STXP, "STXP", "1z001000001sssss0uuuuunnnnnttttt") //INST(STLXP, "STLXP", "1z001000001sssss1uuuuunnnnnttttt") -//INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") -//INST(LDAXRB, "LDAXRB", "zz00100001011111111111nnnnnttttt") +INST(LDXR, "LDXRB, LDXRH, LDXR", "zz00100001011111011111nnnnnttttt") +INST(LDAXR, "LDAXRB, LDAXRH, LDAXR", "zz00100001011111111111nnnnnttttt") //INST(LDXP, "LDXP", "1z001000011111110uuuuunnnnnttttt") //INST(LDAXP, "LDAXP", "1z001000011111111uuuuunnnnnttttt") INST(STLLR, "STLLRB, STLLRH, STLLR", "zz00100010011111011111nnnnnttttt") diff --git a/src/frontend/A64/ir_emitter.cpp b/src/frontend/A64/ir_emitter.cpp index 659ff131..204d76f1 100644 --- a/src/frontend/A64/ir_emitter.cpp +++ b/src/frontend/A64/ir_emitter.cpp @@ -61,6 +61,15 @@ IR::U64 IREmitter::GetTPIDRRO() { return Inst(Opcode::A64GetTPIDRRO); } +void IREmitter::ClearExclusive() { + Inst(Opcode::A64ClearExclusive); +} + +void IREmitter::SetExclusive(const IR::U64& vaddr, size_t byte_size) { + ASSERT(byte_size == 1 || byte_size == 2 || byte_size == 4 || byte_size == 8 || byte_size == 16); + Inst(Opcode::A64SetExclusive, vaddr, Imm8(u8(byte_size))); +} + IR::U8 IREmitter::ReadMemory8(const IR::U64& vaddr) { return Inst(Opcode::A64ReadMemory8, vaddr); } @@ -101,6 +110,26 @@ void IREmitter::WriteMemory128(const IR::U64& vaddr, const IR::U128& value) { Inst(Opcode::A64WriteMemory128, vaddr, value); } +IR::U32 IREmitter::ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value) { + return Inst(Opcode::A64ExclusiveWriteMemory8, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value) { + return Inst(Opcode::A64ExclusiveWriteMemory16, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value) { + return Inst(Opcode::A64ExclusiveWriteMemory32, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value) { + return Inst(Opcode::A64ExclusiveWriteMemory64, vaddr, value); +} + +IR::U32 IREmitter::ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value) { + return Inst(Opcode::A64ExclusiveWriteMemory128, vaddr, value); +} + IR::U32 IREmitter::GetW(Reg reg) { if (reg == Reg::ZR) return Imm32(0); diff --git a/src/frontend/A64/ir_emitter.h b/src/frontend/A64/ir_emitter.h index 68784c6e..24d05f1b 100644 --- a/src/frontend/A64/ir_emitter.h +++ b/src/frontend/A64/ir_emitter.h @@ -47,6 +47,8 @@ public: IR::U32 GetDCZID(); IR::U64 GetTPIDRRO(); + void ClearExclusive(); + void SetExclusive(const IR::U64& vaddr, size_t byte_size); IR::U8 ReadMemory8(const IR::U64& vaddr); IR::U16 ReadMemory16(const IR::U64& vaddr); IR::U32 ReadMemory32(const IR::U64& vaddr); @@ -57,6 +59,11 @@ public: void WriteMemory32(const IR::U64& vaddr, const IR::U32& value); void WriteMemory64(const IR::U64& vaddr, const IR::U64& value); void WriteMemory128(const IR::U64& vaddr, const IR::U128& value); + IR::U32 ExclusiveWriteMemory8(const IR::U64& vaddr, const IR::U8& value); + IR::U32 ExclusiveWriteMemory16(const IR::U64& vaddr, const IR::U16& value); + IR::U32 ExclusiveWriteMemory32(const IR::U64& vaddr, const IR::U32& value); + IR::U32 ExclusiveWriteMemory64(const IR::U64& vaddr, const IR::U64& value); + IR::U32 ExclusiveWriteMemory128(const IR::U64& vaddr, const IR::U128& value); IR::U32 GetW(Reg source_reg); IR::U64 GetX(Reg source_reg); diff --git a/src/frontend/A64/translate/impl/impl.cpp b/src/frontend/A64/translate/impl/impl.cpp index bb7440e0..98261947 100644 --- a/src/frontend/A64/translate/impl/impl.cpp +++ b/src/frontend/A64/translate/impl/impl.cpp @@ -308,6 +308,24 @@ void TranslatorVisitor::Mem(IR::U64 address, size_t bytesize, AccType /*acctype* } } +IR::U32 TranslatorVisitor::ExclusiveMem(IR::U64 address, size_t bytesize, AccType /*acctype*/, IR::UAnyU128 value) { + switch (bytesize) { + case 1: + return ir.ExclusiveWriteMemory8(address, value); + case 2: + return ir.ExclusiveWriteMemory16(address, value); + case 4: + return ir.ExclusiveWriteMemory32(address, value); + case 8: + return ir.ExclusiveWriteMemory64(address, value); + case 16: + return ir.ExclusiveWriteMemory128(address, value); + default: + ASSERT_MSG(false, "Invalid bytesize parameter {}", bytesize); + return {}; + } +} + IR::U32U64 TranslatorVisitor::SignExtend(IR::UAny value, size_t to_size) { switch (to_size) { case 32: diff --git a/src/frontend/A64/translate/impl/impl.h b/src/frontend/A64/translate/impl/impl.h index 26aa7659..c9085ffe 100644 --- a/src/frontend/A64/translate/impl/impl.h +++ b/src/frontend/A64/translate/impl/impl.h @@ -63,6 +63,7 @@ struct TranslatorVisitor final { IR::UAnyU128 Mem(IR::U64 address, size_t size, AccType acctype); void Mem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value); + IR::U32 ExclusiveMem(IR::U64 address, size_t size, AccType acctype, IR::UAnyU128 value); IR::U32U64 SignExtend(IR::UAny value, size_t to_size); IR::U32U64 ZeroExtend(IR::UAny value, size_t to_size); @@ -211,7 +212,7 @@ struct TranslatorVisitor final { bool STXP(Imm<1> size, Reg Rs, Reg Rt2, Reg Rn, Reg Rt); bool STLXP(Imm<1> size, Reg Rs, Reg Rt2, Reg Rn, Reg Rt); bool LDXR(Imm<2> size, Reg Rn, Reg Rt); - bool LDAXRB(Imm<2> size, Reg Rn, Reg Rt); + bool LDAXR(Imm<2> size, Reg Rn, Reg Rt); bool LDXP(Imm<1> size, Reg Rt2, Reg Rn, Reg Rt); bool LDAXP(Imm<1> size, Reg Rt2, Reg Rn, Reg Rt); bool STLLR(Imm<2> size, Reg Rn, Reg Rt); diff --git a/src/frontend/A64/translate/impl/load_store_exclusive.cpp b/src/frontend/A64/translate/impl/load_store_exclusive.cpp index 70129ebf..3b4563c5 100644 --- a/src/frontend/A64/translate/impl/load_store_exclusive.cpp +++ b/src/frontend/A64/translate/impl/load_store_exclusive.cpp @@ -4,10 +4,85 @@ * General Public License version 2 or any later version. */ +#include + #include "frontend/A64/translate/impl/impl.h" namespace Dynarmic::A64 { +static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& tv, IREmitter& ir, size_t size, bool L, bool o0, boost::optional Rs, Reg Rn, Reg Rt) { + // Shared Decode + + const AccType acctype = o0 ? AccType::ORDERED : AccType::ATOMIC; + const MemOp memop = L ? MemOp::LOAD : MemOp::STORE; + const size_t elsize = 8 << size; + const size_t regsize = elsize == 64 ? 64 : 32; + const size_t datasize = elsize; + + // Operation + + const size_t dbytes = datasize / 8; + + if (memop == MemOp::STORE && *Rs == Rn && Rn != Reg::R31) { + return tv.UnpredictableInstruction(); + } + + IR::U64 address; + if (Rn == Reg::SP) { + // TODO: Check SP Alignment + address = tv.SP(64); + } else { + address = tv.X(64, Rn); + } + + switch (memop) { + case MemOp::STORE: { + IR::UAny data = tv.X(datasize, Rt); + IR::U32 status = tv.ExclusiveMem(address, dbytes, acctype, data); + tv.X(32, *Rs, status); + break; + } + case MemOp::LOAD: { + ir.SetExclusive(address, dbytes); + IR::UAny data = tv.Mem(address, dbytes, acctype); + tv.X(regsize, Rt, tv.ZeroExtend(data, regsize)); + break; + } + default: + UNREACHABLE(); + } + + return true; +} + +bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 0; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); +} + +bool TranslatorVisitor::STLXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 0; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, Rs, Rn, Rt); +} + +bool TranslatorVisitor::LDXR(Imm<2> sz, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 1; + const bool o0 = 0; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); +} + +bool TranslatorVisitor::LDAXR(Imm<2> sz, Reg Rn, Reg Rt) { + const size_t size = sz.ZeroExtend(); + const bool L = 1; + const bool o0 = 1; + return ExclusiveSharedDecodeAndOperation(*this, ir, size, L, o0, {}, Rn, Rt); +} + static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& tv, size_t size, bool L, bool o0, Reg Rn, Reg Rt) { // Shared Decode diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index dcdf2b68..7ccd2711 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -89,6 +89,11 @@ bool Inst::IsExclusiveMemoryWrite() const { case Opcode::A32ExclusiveWriteMemory16: case Opcode::A32ExclusiveWriteMemory32: case Opcode::A32ExclusiveWriteMemory64: + case Opcode::A64ExclusiveWriteMemory8: + case Opcode::A64ExclusiveWriteMemory16: + case Opcode::A64ExclusiveWriteMemory32: + case Opcode::A64ExclusiveWriteMemory64: + case Opcode::A64ExclusiveWriteMemory128: return true; default: @@ -249,6 +254,8 @@ bool Inst::CausesCPUException() const { bool Inst::AltersExclusiveState() const { return op == Opcode::A32ClearExclusive || op == Opcode::A32SetExclusive || + op == Opcode::A64ClearExclusive || + op == Opcode::A64SetExclusive || IsExclusiveMemoryWrite(); } diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 461f663d..3796fbca 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -325,6 +325,8 @@ A32OPC(ExclusiveWriteMemory32, T::U32, T::U32, T::U32 A32OPC(ExclusiveWriteMemory64, T::U32, T::U32, T::U32, T::U32 ) // A64 Memory access +A64OPC(ClearExclusive, T::Void, ) +A64OPC(SetExclusive, T::Void, T::U64, T::U8 ) A64OPC(ReadMemory8, T::U8, T::U64 ) A64OPC(ReadMemory16, T::U16, T::U64 ) A64OPC(ReadMemory32, T::U32, T::U64 ) @@ -335,6 +337,11 @@ A64OPC(WriteMemory16, T::Void, T::U64, T::U16 A64OPC(WriteMemory32, T::Void, T::U64, T::U32 ) A64OPC(WriteMemory64, T::Void, T::U64, T::U64 ) A64OPC(WriteMemory128, T::Void, T::U64, T::U128 ) +A64OPC(ExclusiveWriteMemory8, T::U32, T::U64, T::U8 ) +A64OPC(ExclusiveWriteMemory16, T::U32, T::U64, T::U16 ) +A64OPC(ExclusiveWriteMemory32, T::U32, T::U64, T::U32 ) +A64OPC(ExclusiveWriteMemory64, T::U32, T::U64, T::U64 ) +A64OPC(ExclusiveWriteMemory128, T::U32, T::U64, T::U128 ) // Coprocessor A32OPC(CoprocInternalOperation, T::Void, T::CoprocInfo )