backend/arm64: Simple block linking

This commit is contained in:
Merry 2022-11-08 21:39:37 +00:00
parent fa6b58d3a8
commit d6f2a15834
5 changed files with 104 additions and 10 deletions

View file

@ -144,6 +144,7 @@ CodePtr A32AddressSpace::GetOrEmit(IR::LocationDescriptor descriptor) {
void A32AddressSpace::ClearCache() { void A32AddressSpace::ClearCache() {
block_entries.clear(); block_entries.clear();
block_infos.clear(); block_infos.clear();
block_references.clear();
code.set_ptr(prelude_info.end_of_prelude); code.set_ptr(prelude_info.end_of_prelude);
} }
@ -309,18 +310,57 @@ EmittedBlockInfo A32AddressSpace::Emit(IR::Block block) {
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv), .state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A32JitState, fpsr), .state_fpsr_offset = offsetof(A32JitState, fpsr),
.coprocessors = conf.coprocessors, .coprocessors = conf.coprocessors,
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
}; };
EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf); EmittedBlockInfo block_info = EmitArm64(code, std::move(block), emit_conf);
Link(block_info); Link(block.Location(), block_info);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size); mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
RelinkForDescriptor(block.Location());
mem.protect(); mem.protect();
return block_info; return block_info;
} }
void A32AddressSpace::Link(EmittedBlockInfo& block_info) { static void LinkBlockLinks(const CodePtr entry_point, const CodePtr target_ptr, const std::vector<BlockRelocation>& block_relocations_list) {
using namespace oaknut;
using namespace oaknut::util;
for (auto [ptr_offset, type] : block_relocations_list) {
CodeGenerator c{reinterpret_cast<u32*>(entry_point + ptr_offset)};
switch (type) {
case BlockLinkType::LinkBlockUnconditionally:
if (target_ptr) {
c.B((void*)target_ptr);
} else {
c.NOP();
}
break;
case BlockLinkType::LinkBlockIfGreater:
if (target_ptr) {
c.B(GE, (void*)target_ptr);
} else {
c.NOP();
}
break;
case BlockLinkType::LinkBlockIfWscratch0IsZero:
if (target_ptr) {
c.CBZ(Wscratch0, (void*)target_ptr);
} else {
c.NOP();
}
break;
default:
ASSERT_FALSE("Invalid block relocation type");
}
}
}
void A32AddressSpace::Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block_info) {
using namespace oaknut; using namespace oaknut;
using namespace oaknut::util; using namespace oaknut::util;
@ -401,6 +441,23 @@ void A32AddressSpace::Link(EmittedBlockInfo& block_info) {
ASSERT_FALSE("Invalid relocation target"); ASSERT_FALSE("Invalid relocation target");
} }
} }
for (auto [target_descriptor, list] : block_info.block_relocations) {
block_references[target_descriptor.Value()].emplace(block_descriptor.Value());
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), list);
}
}
void A32AddressSpace::RelinkForDescriptor(IR::LocationDescriptor target_descriptor) {
for (auto block_descriptor : block_references[target_descriptor.Value()]) {
if (auto iter = block_infos.find(block_descriptor); iter != block_infos.end()) {
const EmittedBlockInfo& block_info = iter->second;
LinkBlockLinks(block_info.entry_point, Get(target_descriptor), block_infos[block_descriptor].block_relocations[target_descriptor]);
mem.invalidate(reinterpret_cast<u32*>(block_info.entry_point), block_info.size);
}
}
} }
} // namespace Dynarmic::Backend::Arm64 } // namespace Dynarmic::Backend::Arm64

View file

@ -9,6 +9,7 @@
#include <oaknut/code_block.hpp> #include <oaknut/code_block.hpp>
#include <oaknut/oaknut.hpp> #include <oaknut/oaknut.hpp>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
#include <tsl/robin_set.h>
#include "dynarmic/backend/arm64/emit_arm64.h" #include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/config.h"
@ -39,7 +40,8 @@ private:
size_t GetRemainingSize(); size_t GetRemainingSize();
EmittedBlockInfo Emit(IR::Block ir_block); EmittedBlockInfo Emit(IR::Block ir_block);
void Link(EmittedBlockInfo& block); void Link(IR::LocationDescriptor block_descriptor, EmittedBlockInfo& block);
void RelinkForDescriptor(IR::LocationDescriptor target_descriptor);
const A32::UserConfig conf; const A32::UserConfig conf;
@ -48,6 +50,7 @@ private:
tsl::robin_map<u64, CodePtr> block_entries; tsl::robin_map<u64, CodePtr> block_entries;
tsl::robin_map<u64, EmittedBlockInfo> block_infos; tsl::robin_map<u64, EmittedBlockInfo> block_infos;
tsl::robin_map<u64, tsl::robin_set<u64>> block_references;
struct PreludeInfo { struct PreludeInfo {
u32* end_of_prelude; u32* end_of_prelude;

View file

@ -216,4 +216,9 @@ void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget li
code.NOP(); code.NOP();
} }
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, BlockLinkType link_type, const IR::LocationDescriptor& descriptor) {
ctx.ebi.block_relocations[descriptor].emplace_back(BlockRelocation{code.ptr<CodePtr>() - ctx.ebi.entry_point, link_type});
code.NOP();
}
} // namespace Dynarmic::Backend::Arm64 } // namespace Dynarmic::Backend::Arm64

View file

@ -11,8 +11,11 @@
#include <vector> #include <vector>
#include <mcl/stdint.hpp> #include <mcl/stdint.hpp>
#include <tsl/robin_map.h>
#include "dynarmic/interface/A32/coprocessor.h" #include "dynarmic/interface/A32/coprocessor.h"
#include "dynarmic/interface/optimization_flags.h"
#include "dynarmic/ir/location_descriptor.h"
namespace oaknut { namespace oaknut {
struct PointerCodeGeneratorPolicy; struct PointerCodeGeneratorPolicy;
@ -29,7 +32,6 @@ class FPCR;
namespace Dynarmic::IR { namespace Dynarmic::IR {
class Block; class Block;
class Inst; class Inst;
class LocationDescriptor;
enum class Cond; enum class Cond;
enum class Opcode; enum class Opcode;
} // namespace Dynarmic::IR } // namespace Dynarmic::IR
@ -64,15 +66,27 @@ enum class LinkTarget {
GetTicksRemaining, GetTicksRemaining,
}; };
enum class BlockLinkType {
LinkBlockUnconditionally,
LinkBlockIfGreater,
LinkBlockIfWscratch0IsZero,
};
struct Relocation { struct Relocation {
std::ptrdiff_t code_offset; std::ptrdiff_t code_offset;
LinkTarget target; LinkTarget target;
}; };
struct BlockRelocation {
std::ptrdiff_t code_offset;
BlockLinkType type;
};
struct EmittedBlockInfo { struct EmittedBlockInfo {
CodePtr entry_point; CodePtr entry_point;
size_t size; size_t size;
std::vector<Relocation> relocations; std::vector<Relocation> relocations;
tsl::robin_map<IR::LocationDescriptor, std::vector<BlockRelocation>> block_relocations;
}; };
struct EmitConfig { struct EmitConfig {
@ -86,6 +100,10 @@ struct EmitConfig {
size_t state_fpsr_offset; size_t state_fpsr_offset;
std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{}; std::array<std::shared_ptr<A32::Coprocessor>, 16> coprocessors{};
OptimizationFlag optimizations;
bool HasOptimization(OptimizationFlag f) const { return (f & optimizations) != no_optimizations; }
}; };
struct EmitContext; struct EmitContext;
@ -95,6 +113,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
template<IR::Opcode op> template<IR::Opcode op>
void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst); void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst);
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target); void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, LinkTarget link_target);
void EmitRelocation(oaknut::CodeGenerator& code, EmitContext& ctx, BlockLinkType type, const IR::LocationDescriptor& descriptor);
oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond); oaknut::Label EmitA32Cond(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Cond cond);
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx); void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx);
void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx); void EmitA32ConditionFailedTerminal(oaknut::CodeGenerator& code, EmitContext& ctx);

View file

@ -58,24 +58,34 @@ void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ct
} }
} }
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool) { void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location); EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC()); if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15); if (ctx.conf.enable_cycle_counting) {
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); code.CMP(Xticks, 0);
EmitRelocation(code, ctx, BlockLinkType::LinkBlockIfGreater, terminal.next);
// TODO: Implement LinkBlock optimization } else {
code.LDAR(Wscratch0, Xhalt);
EmitRelocation(code, ctx, BlockLinkType::LinkBlockIfWscratch0IsZero, terminal.next);
}
} }
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool) { code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
}
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location); EmitSetUpperLocationDescriptor(code, ctx, terminal.next, initial_location);
if (ctx.conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
EmitRelocation(code, ctx, BlockLinkType::LinkBlockUnconditionally, terminal.next);
}
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC()); code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15); code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher); EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
// TODO: Implement LinkBlockFast optimization
} }
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) { void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {