From d6f2a15834be4fe402832df3b7d07c2d55e7b664 Mon Sep 17 00:00:00 2001 From: Merry Date: Tue, 8 Nov 2022 21:39:37 +0000 Subject: [PATCH] backend/arm64: Simple block linking --- .../backend/arm64/a32_address_space.cpp | 61 ++++++++++++++++++- .../backend/arm64/a32_address_space.h | 5 +- src/dynarmic/backend/arm64/emit_arm64.cpp | 5 ++ src/dynarmic/backend/arm64/emit_arm64.h | 21 ++++++- src/dynarmic/backend/arm64/emit_arm64_a32.cpp | 22 +++++-- 5 files changed, 104 insertions(+), 10 deletions(-) diff --git a/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/backend/arm64/a32_address_space.cpp index a060b8f5..00a0a894 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -144,6 +144,7 @@ CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) { void A32AddressSpace::ClearCache() { block_entries.clear(); block_infos.clear(); + block_references.clear(); code.set_ptr(prelude_info.end_of_prelude); } @@ -309,18 +310,57 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) { .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), .state_fpsr_offset = offsetof(A32JitState, fpsr), .coprocessors = conf.coprocessors, + .optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations, }; EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); - Link(block_info); + Link(block.Location(), block_info); mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + + RelinkForDescriptor(block.Location()); + mem.protect(); return block_info; } -void A32AddressSpace::Link(EmittedBlockInfo& block_info) { +static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector& block_relocations_list) { + using namespace oaknut; + using namespace oaknut::util; + + for (auto [ptr_offset, type] : block_relocations_list) { + CodeGenerator c{reinterpret_cast(entry_point + ptr_offset)}; + + switch (type) { + case BlockLinkType::LinkBlockUnconditionally: + if (target_ptr) { + c.B((void*)target_ptr); + } else { + c.NOP(); + } + break; + case BlockLinkType::LinkBlockIfGreater: + if (target_ptr) { + c.B(GE, (void*)target_ptr); + } else { + c.NOP(); + } + break; + case BlockLinkType::LinkBlockIfWscratch0IsZero: + if (target_ptr) { + c.CBZ(Wscratch0, (void*)target_ptr); + } else { + c.NOP(); + } + break; + default: + ASSERT_FALSE("Invalid block relocation type"); + } + } +} + +void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) { using namespace oaknut; using namespace oaknut::util; @@ -401,6 +441,23 @@ void A32AddressSpace::Link(EmittedBlockInfo& block_info) { ASSERT_FALSE("Invalid relocation target"); } } + + for (auto [target_descriptor, list] : block_info.block_relocations) { + block_references[target_descriptor.Value()].emplace(block_descriptor.Value()); + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list); + } +} + +void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) { + for (auto block_descriptor : block_references[target_descriptor.Value()]) { + if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) { + const EmittedBlockInfo& block_info = iter->second; + + LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]); + + mem.invalidate(reinterpret_cast(block_info.entry_point), block_info.size); + } + } } } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/a32_address_space.h b/src/dynarmic/backend/arm64/a32_address_space.h index 7bca0885..18ed118d 100644 --- a/src/dynarmic/backend/arm64/a32_address_space.h +++ b/src/dynarmic/backend/arm64/a32_address_space.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/interface/A32/config.h" @@ -39,7 +40,8 @@ private: size_t GetRemainingSize(); EmittedBlockInfo Emit(IR::Block ir_block); - void Link(EmittedBlockInfo& block); + void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block); + void RelinkForDescriptor(IR::LocationDescriptor target_descriptor); const A32::UserConfig conf; @@ -48,6 +50,7 @@ private: tsl::robin_map block_entries; tsl::robin_map block_infos; + tsl::robin_map> block_references; struct PreludeInfo { u32* end_of_prelude; diff --git a/src/dynarmic/backend/arm64/emit_arm64.cpp b/src/dynarmic/backend/arm64/emit_arm64.cpp index fdd181ef..bd864dca 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64.cpp @@ -216,4 +216,9 @@ void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget li code.NOP(); } +void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, BlockLinkType link_type, const IR::LocationDescriptor& descriptor) { + ctx.ebi.block_relocations[descriptor].emplace_back(BlockRelocation{code.ptr() - ctx.ebi.entry_point, link_type}); + code.NOP(); +} + } // namespace Dynarmic::Backend::Arm64 diff --git a/src/dynarmic/backend/arm64/emit_arm64.h b/src/dynarmic/backend/arm64/emit_arm64.h index ed03fa84..9f6767f4 100644 --- a/src/dynarmic/backend/arm64/emit_arm64.h +++ b/src/dynarmic/backend/arm64/emit_arm64.h @@ -11,8 +11,11 @@ #include #include +#include #include "dynarmic/interface/A32/coprocessor.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/ir/location_descriptor.h" namespace oaknut { struct PointerCodeGeneratorPolicy; @@ -29,7 +32,6 @@ class FPCR; namespace Dynarmic::IR { class Block; class Inst; -class LocationDescriptor; enum class Cond; enum class Opcode; } // namespace Dynarmic::IR @@ -64,15 +66,27 @@ enum class LinkTarget { GetTicksRemaining, }; +enum class BlockLinkType { + LinkBlockUnconditionally, + LinkBlockIfGreater, + LinkBlockIfWscratch0IsZero, +}; + struct Relocation { std::ptrdiff_t code_offset; LinkTarget target; }; +struct BlockRelocation { + std::ptrdiff_t code_offset; + BlockLinkType type; +}; + struct EmittedBlockInfo { CodePtr entry_point; size_t size; std::vector relocations; + tsl::robin_map> block_relocations; }; struct EmitConfig { @@ -86,6 +100,10 @@ struct EmitConfig { size_t state_fpsr_offset; std::array, 16> coprocessors{}; + + OptimizationFlag optimizations; + + bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; } }; struct EmitContext; @@ -95,6 +113,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E template void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst); void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target); +void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, BlockLinkType type, const IR::LocationDescriptor& descriptor); oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); diff --git a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp index f713d688..9d9db953 100644 --- a/src/dynarmic/backend/arm64/emit_arm64_a32.cpp +++ b/src/dynarmic/backend/arm64/emit_arm64_a32.cpp @@ -58,24 +58,34 @@ void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ct } } -void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) { +void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) { EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location); + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + if (ctx.conf.enable_cycle_counting) { + code.CMP(Xticks, 0); + EmitRelocation(code, ctx, BlockLinkType::LinkBlockIfGreater, terminal.next); + } else { + code.LDAR(Wscratch0, Xhalt); + EmitRelocation(code, ctx, BlockLinkType::LinkBlockIfWscratch0IsZero, terminal.next); + } + } + code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC()); code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15); EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); - - // TODO: Implement LinkBlock optimization } -void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) { +void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) { EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location); + if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) { + EmitRelocation(code, ctx, BlockLinkType::LinkBlockUnconditionally, terminal.next); + } + code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC()); code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15); EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); - - // TODO: Implement LinkBlockFast optimization } void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {