A32: Implement FastDispatchHint

This commit is contained in:
MerryMage 2018-09-07 21:30:12 +01:00
parent aa8d826c13
commit f96c43d422
11 changed files with 112 additions and 23 deletions

View file

@ -87,6 +87,9 @@ struct UserConfig {
/// instruction the ExceptionRaised callback is called. If this is true, we define /// instruction the ExceptionRaised callback is called. If this is true, we define
/// definite behaviour for some unpredictable instructions. /// definite behaviour for some unpredictable instructions.
bool define_unpredictable_behaviour = false; bool define_unpredictable_behaviour = false;
/// This enables the fast dispatcher.
bool enable_fast_dispatch = true;
}; };
} // namespace A32 } // namespace A32

View file

@ -79,10 +79,11 @@ bool A32EmitContext::FPSCR_DN() const {
} }
A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface) A32EmitX64::A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface)
: EmitX64(code), config(std::move(config)), jit_interface(jit_interface) : EmitX64(code), config(std::move(config)), jit_interface(jit_interface) {
{
GenMemoryAccessors(); GenMemoryAccessors();
GenTerminalHandlers();
code.PreludeComplete(); code.PreludeComplete();
ClearFastDispatchTable();
} }
A32EmitX64::~A32EmitX64() = default; A32EmitX64::~A32EmitX64() = default;
@ -148,10 +149,16 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
void A32EmitX64::ClearCache() { void A32EmitX64::ClearCache() {
EmitX64::ClearCache(); EmitX64::ClearCache();
block_ranges.ClearCache(); block_ranges.ClearCache();
ClearFastDispatchTable();
} }
void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) { void A32EmitX64::InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges) {
InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges)); InvalidateBasicBlocks(block_ranges.InvalidateRanges(ranges));
ClearFastDispatchTable();
}
void A32EmitX64::ClearFastDispatchTable() {
fast_dispatch_table.fill({0xFFFFFFFFFFFFFFFFull, nullptr});
} }
void A32EmitX64::GenMemoryAccessors() { void A32EmitX64::GenMemoryAccessors() {
@ -220,6 +227,61 @@ void A32EmitX64::GenMemoryAccessors() {
PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64"); PerfMapRegister(write_memory_64, code.getCurr(), "a32_write_memory_64");
} }
void A32EmitX64::GenTerminalHandlers() {
// PC ends up in ebp, location_descriptor ends up in rbx
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with IREmitter::PushRSB
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code.mov(ecx, MJitStateReg(A32::Reg::PC));
code.mov(ebp, ecx);
code.shl(rcx, 32);
code.mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]);
code.or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]);
code.or_(rbx, rcx);
};
Xbyak::Label fast_dispatch_cache_miss, rsb_cache_miss;
code.align();
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]);
code.sub(eax, 1);
code.and_(eax, u32(A32JitState::RSBPtrMask));
code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax);
code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
if (config.enable_fast_dispatch) {
code.jne(rsb_cache_miss);
} else {
code.jne(code.GetReturnFromRunCodeAddress());
}
code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code.jmp(rax);
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint");
if (config.enable_fast_dispatch) {
code.align();
terminal_handler_fast_dispatch_hint = code.getCurr<const void*>();
calculate_location_descriptor();
code.L(rsb_cache_miss);
code.mov(r12, reinterpret_cast<u64>(fast_dispatch_table.data()));
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
code.crc32(ebp, r12d);
}
code.and_(ebp, fast_dispatch_table_mask);
code.lea(rbp, ptr[r12 + rbp]);
code.cmp(rbx, qword[rbp + offsetof(FastDispatchEntry, location_descriptor)]);
code.jne(fast_dispatch_cache_miss);
code.jmp(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)]);
code.L(fast_dispatch_cache_miss);
code.mov(qword[rbp + offsetof(FastDispatchEntry, location_descriptor)], rbx);
code.LookupBlock();
code.mov(ptr[rbp + offsetof(FastDispatchEntry, code_ptr)], rax);
code.jmp(rax);
PerfMapRegister(terminal_handler_fast_dispatch_hint, code.getCurr(), "a32_terminal_handler_fast_dispatch_hint");
}
}
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
A32::Reg reg = inst->GetArg(0).GetA32RegRef(); A32::Reg reg = inst->GetArg(0).GetA32RegRef();
@ -1222,16 +1284,15 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) { void A32EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor) {
// This calculation has to match up with IREmitter::PushRSB code.jmp(terminal_handler_pop_rsb_hint);
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et. }
code.mov(ecx, MJitStateReg(A32::Reg::PC));
code.shl(rcx, 32);
code.mov(ebx, dword[r15 + offsetof(A32JitState, FPSCR_mode)]);
code.or_(ebx, dword[r15 + offsetof(A32JitState, CPSR_et)]);
code.or_(rbx, rcx);
void A32EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor initial_location) { void A32EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor) {
EmitTerminalImpl(IR::Term::ReturnToDispatch{}, initial_location); if (config.enable_fast_dispatch) {
code.jmp(terminal_handler_fast_dispatch_hint);
} else {
code.ReturnFromRunCode();
}
} }
void A32EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) { void A32EmitX64::EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) {

View file

@ -6,6 +6,8 @@
#pragma once #pragma once
#include <array>
#include <boost/optional.hpp> #include <boost/optional.hpp>
#include "backend/x64/a32_jitstate.h" #include "backend/x64/a32_jitstate.h"
@ -49,6 +51,16 @@ protected:
A32::Jit* jit_interface; A32::Jit* jit_interface;
BlockRangeInformation<u32> block_ranges; BlockRangeInformation<u32> block_ranges;
struct FastDispatchEntry {
u64 location_descriptor;
const void* code_ptr;
};
static_assert(sizeof(FastDispatchEntry) == 0x10);
static constexpr u64 fast_dispatch_table_mask = 0xFFFF0;
static constexpr size_t fast_dispatch_table_size = 0x10000;
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
void ClearFastDispatchTable();
const void* read_memory_8; const void* read_memory_8;
const void* read_memory_16; const void* read_memory_16;
const void* read_memory_32; const void* read_memory_32;
@ -59,6 +71,10 @@ protected:
const void* write_memory_64; const void* write_memory_64;
void GenMemoryAccessors(); void GenMemoryAccessors();
const void* terminal_handler_pop_rsb_hint;
const void* terminal_handler_fast_dispatch_hint = nullptr;
void GenTerminalHandlers();
// Microinstruction emitters // Microinstruction emitters
#define OPCODE(...) #define OPCODE(...)
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst); #define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);

View file

@ -257,6 +257,10 @@ void BlockOfCode::UpdateTicks() {
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN); mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
} }
void BlockOfCode::LookupBlock() {
cb.LookupBlock->EmitCall(*this);
}
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
return constant_pool.GetConstant(frame, lower, upper); return constant_pool.GetConstant(frame, lower, upper);
} }

View file

@ -57,8 +57,11 @@ public:
/// Code emitter: Makes saved host MXCSR the current MXCSR /// Code emitter: Makes saved host MXCSR the current MXCSR
void SwitchMxcsrOnExit(); void SwitchMxcsrOnExit();
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining /// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
/// @note this clobbers ABI callee-save registers /// @note this clobbers ABI caller-save registers
void UpdateTicks(); void UpdateTicks();
/// Code emitter: Performs a block lookup based on current state
/// @note this clobbers ABI caller-save registers
void LookupBlock();
/// Code emitter: Calls the function /// Code emitter: Calls the function
template <typename FunctionPointer> template <typename FunctionPointer>

View file

@ -52,7 +52,7 @@ bool ArmTranslatorVisitor::arm_BLX_reg(Cond cond, Reg m) {
ir.PushRSB(ir.current_location.AdvancePC(4)); ir.PushRSB(ir.current_location.AdvancePC(4));
ir.BXWritePC(ir.GetRegister(m)); ir.BXWritePC(ir.GetRegister(m));
ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4)); ir.SetRegister(Reg::LR, ir.Imm32(ir.current_location.PC() + 4));
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
return true; return true;
@ -65,7 +65,7 @@ bool ArmTranslatorVisitor::arm_BX(Cond cond, Reg m) {
if (m == Reg::R14) if (m == Reg::R14)
ir.SetTerm(IR::Term::PopRSBHint{}); ir.SetTerm(IR::Term::PopRSBHint{});
else else
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
return true; return true;

View file

@ -66,7 +66,7 @@ bool ArmTranslatorVisitor::arm_LDR_lit(Cond cond, bool U, Reg t, Imm12 imm12) {
if (t == Reg::PC) { if (t == Reg::PC) {
ir.LoadWritePC(data); ir.LoadWritePC(data);
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
@ -96,7 +96,7 @@ bool ArmTranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n,
if (!P && W && n == Reg::R13) if (!P && W && n == Reg::R13)
ir.SetTerm(IR::Term::PopRSBHint{}); ir.SetTerm(IR::Term::PopRSBHint{});
else else
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
@ -121,7 +121,7 @@ bool ArmTranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n,
if (t == Reg::PC) { if (t == Reg::PC) {
ir.LoadWritePC(data); ir.LoadWritePC(data);
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
@ -623,7 +623,7 @@ static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 s
if (n == Reg::R13) if (n == Reg::R13)
ir.SetTerm(IR::Term::PopRSBHint{}); ir.SetTerm(IR::Term::PopRSBHint{});
else else
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
return true; return true;

View file

@ -370,7 +370,7 @@ struct ThumbTranslatorVisitor final {
if (d == Reg::PC) { if (d == Reg::PC) {
ir.ALUWritePC(result.result); ir.ALUWritePC(result.result);
// Return to dispatch as we can't predict what PC is going to be. Stop compilation. // Return to dispatch as we can't predict what PC is going to be. Stop compilation.
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} else { } else {
ir.SetRegister(d, result.result); ir.SetRegister(d, result.result);
@ -400,7 +400,7 @@ struct ThumbTranslatorVisitor final {
auto result = ir.GetRegister(m); auto result = ir.GetRegister(m);
if (d == Reg::PC) { if (d == Reg::PC) {
ir.ALUWritePC(result); ir.ALUWritePC(result);
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} else { } else {
ir.SetRegister(d, result); ir.SetRegister(d, result);
@ -775,7 +775,7 @@ struct ThumbTranslatorVisitor final {
if (m == Reg::R14) if (m == Reg::R14)
ir.SetTerm(IR::Term::PopRSBHint{}); ir.SetTerm(IR::Term::PopRSBHint{});
else else
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }
@ -784,7 +784,7 @@ struct ThumbTranslatorVisitor final {
ir.PushRSB(ir.current_location.AdvancePC(2)); ir.PushRSB(ir.current_location.AdvancePC(2));
ir.BXWritePC(ir.GetRegister(m)); ir.BXWritePC(ir.GetRegister(m));
ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 2) | 1)); ir.SetRegister(Reg::LR, ir.Imm32((ir.current_location.PC() + 2) | 1));
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
} }

View file

@ -84,7 +84,7 @@ bool TranslatorVisitor::MSR_reg(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, I
case SystemRegisterEncoding::FPCR: case SystemRegisterEncoding::FPCR:
ir.SetFPCR(X(32, Rt)); ir.SetFPCR(X(32, Rt));
ir.SetPC(ir.Imm64(ir.current_location->PC() + 4)); ir.SetPC(ir.Imm64(ir.current_location->PC() + 4));
ir.SetTerm(IR::Term::ReturnToDispatch{}); ir.SetTerm(IR::Term::FastDispatchHint{});
return false; return false;
case SystemRegisterEncoding::FPSR: case SystemRegisterEncoding::FPSR:
ir.SetFPSR(X(32, Rt)); ir.SetFPSR(X(32, Rt));

View file

@ -41,6 +41,7 @@ using Dynarmic::Common::Bits;
static Dynarmic::A32::UserConfig GetUserConfig(ArmTestEnv* testenv) { static Dynarmic::A32::UserConfig GetUserConfig(ArmTestEnv* testenv) {
Dynarmic::A32::UserConfig user_config; Dynarmic::A32::UserConfig user_config;
user_config.enable_fast_dispatch = false;
user_config.callbacks = testenv; user_config.callbacks = testenv;
return user_config; return user_config;
} }

View file

@ -32,6 +32,7 @@
static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) {
Dynarmic::A32::UserConfig user_config; Dynarmic::A32::UserConfig user_config;
user_config.enable_fast_dispatch = false;
user_config.callbacks = testenv; user_config.callbacks = testenv;
return user_config; return user_config;
} }