From c6a091d87416e3f03dc7e88889ded17763ccd4bb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 13 Jan 2018 21:51:13 +0000 Subject: [PATCH] A64: Optimization: Merge interpret blocks --- src/CMakeLists.txt | 1 + src/backend_x64/a32_emit_x64.cpp | 1 + src/backend_x64/a64_emit_x64.cpp | 2 +- src/backend_x64/a64_interface.cpp | 1 + src/frontend/ir/basic_block.cpp | 5 ++ src/frontend/ir/basic_block.h | 2 + src/frontend/ir/terminal.h | 3 +- src/ir_opt/a64_merge_interpret_blocks.cpp | 61 +++++++++++++++++++++++ src/ir_opt/passes.h | 2 + 9 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 src/ir_opt/a64_merge_interpret_blocks.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e42a92f3..864ff6cc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -90,6 +90,7 @@ add_library(dynarmic frontend/ir/value.h ir_opt/a32_constant_memory_reads_pass.cpp ir_opt/a32_get_set_elimination_pass.cpp + ir_opt/a64_merge_interpret_blocks.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/passes.h diff --git a/src/backend_x64/a32_emit_x64.cpp b/src/backend_x64/a32_emit_x64.cpp index 7291743c..e4ec3779 100644 --- a/src/backend_x64/a32_emit_x64.cpp +++ b/src/backend_x64/a32_emit_x64.cpp @@ -1075,6 +1075,7 @@ void A32EmitX64::EmitA32CoprocStoreWords(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) { ASSERT_MSG(A32::LocationDescriptor{terminal.next}.TFlag() == A32::LocationDescriptor{initial_location}.TFlag(), "Unimplemented"); ASSERT_MSG(A32::LocationDescriptor{terminal.next}.EFlag() == A32::LocationDescriptor{initial_location}.EFlag(), "Unimplemented"); + ASSERT_MSG(terminal.num_instructions == 1, "Unimplemented"); code->mov(code->ABI_PARAM1.cvt32(), A32::LocationDescriptor{terminal.next}.PC()); code->mov(code->ABI_PARAM2, reinterpret_cast(jit_interface)); diff --git a/src/backend_x64/a64_emit_x64.cpp b/src/backend_x64/a64_emit_x64.cpp index 83d81163..dd31bdac 100644 --- a/src/backend_x64/a64_emit_x64.cpp +++ b/src/backend_x64/a64_emit_x64.cpp @@ -310,7 +310,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc DEVIRT(conf.callbacks, &A64::UserCallbacks::InterpreterFallback).EmitCall(code, [&](Xbyak::Reg64 param1, Xbyak::Reg64 param2) { code->mov(param1, A64::LocationDescriptor{terminal.next}.PC()); code->mov(qword[r15 + offsetof(A64JitState, pc)], param1); - code->mov(param2.cvt32(), 1); + code->mov(param2.cvt32(), terminal.num_instructions); }); code->ReturnFromRunCode(true); // TODO: Check cycles } diff --git a/src/backend_x64/a64_interface.cpp b/src/backend_x64/a64_interface.cpp index 01b82651..f5fed7c1 100644 --- a/src/backend_x64/a64_interface.cpp +++ b/src/backend_x64/a64_interface.cpp @@ -176,6 +176,7 @@ private: // JIT Compile IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }); Optimization::DeadCodeElimination(ir_block); + Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); // printf("%s\n", IR::DumpBlock(ir_block).c_str()); Optimization::VerificationPass(ir_block); return emitter.Emit(ir_block).entrypoint; diff --git a/src/frontend/ir/basic_block.cpp b/src/frontend/ir/basic_block.cpp index fa51487d..c0a608b5 100644 --- a/src/frontend/ir/basic_block.cpp +++ b/src/frontend/ir/basic_block.cpp @@ -90,6 +90,11 @@ void Block::SetTerminal(Terminal term) { terminal = term; } +void Block::ReplaceTerminal(Terminal term) { + ASSERT_MSG(HasTerminal(), "Terminal has not been set."); + terminal = term; +} + bool Block::HasTerminal() const { return terminal.which() != 0; } diff --git a/src/frontend/ir/basic_block.h b/src/frontend/ir/basic_block.h index 47b59aa9..852efd11 100644 --- a/src/frontend/ir/basic_block.h +++ b/src/frontend/ir/basic_block.h @@ -111,6 +111,8 @@ public: Terminal GetTerminal() const; /// Sets the terminal instruction for this basic block. void SetTerminal(Terminal term); + /// Replaces the terminal instruction for this basic block. + void ReplaceTerminal(Terminal term); /// Determines whether or not this basic block has a terminal instruction. bool HasTerminal() const; diff --git a/src/frontend/ir/terminal.h b/src/frontend/ir/terminal.h index 48deaec3..d62d307f 100644 --- a/src/frontend/ir/terminal.h +++ b/src/frontend/ir/terminal.h @@ -20,11 +20,12 @@ struct Invalid {}; /** * This terminal instruction calls the interpreter, starting at `next`. - * The interpreter must interpret exactly one instruction. + * The interpreter must interpret exactly `num_instructions` instructions. */ struct Interpret { explicit Interpret(const LocationDescriptor& next_) : next(next_) {} LocationDescriptor next; ///< Location at which interpretation starts. + size_t num_instructions = 1; }; /** diff --git a/src/ir_opt/a64_merge_interpret_blocks.cpp b/src/ir_opt/a64_merge_interpret_blocks.cpp new file mode 100644 index 00000000..f0be76c3 --- /dev/null +++ b/src/ir_opt/a64_merge_interpret_blocks.cpp @@ -0,0 +1,61 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2018 MerryMage + * This software may be used and distributed according to the terms of the GNU + * General Public License version 2 or any later version. + */ + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "dynarmic/A64/config.h" +#include "frontend/A64/location_descriptor.h" +#include "frontend/A64/translate/translate.h" +#include "frontend/ir/basic_block.h" +#include "ir_opt/passes.h" + +namespace Dynarmic { +namespace Optimization { + +void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const u32 instruction = cb->MemoryReadCode(location.PC()); + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + printf("INTERPET %08x \\x%02x\\x%02x\\x%02x\\x%02x \n", instruction, u8(instruction >> 0), u8(instruction >> 8), u8(instruction >> 16), u8(instruction >> 24)); + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + printf("%zu\n", num_instructions); + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +} // namespace Optimization +} // namespace Dynarmic diff --git a/src/ir_opt/passes.h b/src/ir_opt/passes.h index d94be7ad..0416d00a 100644 --- a/src/ir_opt/passes.h +++ b/src/ir_opt/passes.h @@ -7,6 +7,7 @@ #pragma once #include +#include namespace Dynarmic { namespace IR { @@ -19,6 +20,7 @@ namespace Optimization { void A32GetSetElimination(IR::Block& block); void A32ConstantMemoryReads(IR::Block& block, const A32::UserCallbacks::Memory& memory_callbacks); +void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); void ConstantPropagation(IR::Block& block); void DeadCodeElimination(IR::Block& block); void VerificationPass(const IR::Block& block);