backend/arm64: Implement cycle counting
This commit is contained in:
parent
b5ad066372
commit
e476fad5a2
7 changed files with 138 additions and 81 deletions
|
@ -152,55 +152,6 @@ void A32AddressSpace::EmitPrelude() {
|
|||
|
||||
mem.unprotect();
|
||||
|
||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.BR(X0);
|
||||
|
||||
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label step_hr_loop;
|
||||
code.l(step_hr_loop);
|
||||
code.LDAXR(Wscratch0, Xhalt);
|
||||
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
|
||||
code.STLXR(Wscratch1, Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch1, step_hr_loop);
|
||||
|
||||
code.BR(X0);
|
||||
|
||||
prelude_info.return_from_run_code = code.ptr<void*>();
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label exit_hr_loop;
|
||||
code.l(exit_hr_loop);
|
||||
code.LDAXR(W0, Xhalt);
|
||||
code.STLXR(Wscratch0, WZR, Xhalt);
|
||||
code.CBNZ(Wscratch0, exit_hr_loop);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
|
||||
prelude_info.read_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
|
||||
prelude_info.read_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
|
||||
prelude_info.read_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
|
||||
|
@ -223,6 +174,112 @@ void A32AddressSpace::EmitPrelude() {
|
|||
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
|
||||
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
|
||||
|
||||
oaknut::Label return_from_run_code;
|
||||
|
||||
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.BL(prelude_info.get_ticks_remaining);
|
||||
code.MOV(Xticks, X0);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
|
||||
{
|
||||
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
|
||||
code.MOV(X19, X0);
|
||||
code.MOV(Xstate, X1);
|
||||
code.MOV(Xhalt, X2);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.MOV(Xticks, 1);
|
||||
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
|
||||
code.AND(Wscratch0, Wscratch0, 0xffff0000);
|
||||
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
|
||||
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label step_hr_loop;
|
||||
code.l(step_hr_loop);
|
||||
code.LDAXR(Wscratch0, Xhalt);
|
||||
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
|
||||
code.STLXR(Wscratch1, Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch1, step_hr_loop);
|
||||
|
||||
code.BR(X19);
|
||||
}
|
||||
|
||||
prelude_info.return_to_dispatcher = code.ptr<void*>();
|
||||
{
|
||||
oaknut::Label l_this, l_addr;
|
||||
|
||||
code.LDAR(Wscratch0, Xhalt);
|
||||
code.CBNZ(Wscratch0, return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.CMP(Xticks, 0);
|
||||
code.B(LE, return_from_run_code);
|
||||
}
|
||||
|
||||
code.LDR(X0, l_this);
|
||||
code.MOV(X1, Xstate);
|
||||
code.LDR(Xscratch0, l_addr);
|
||||
code.BLR(Xscratch0);
|
||||
code.BR(X0);
|
||||
|
||||
const auto fn = [](A32AddressSpace& self, A32JitState& context) -> CodePtr {
|
||||
return self.GetOrEmit(context.GetLocationDescriptor());
|
||||
};
|
||||
|
||||
code.align(8);
|
||||
code.l(l_this);
|
||||
code.dx(mcl::bit_cast<u64>(this));
|
||||
code.l(l_addr);
|
||||
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
|
||||
}
|
||||
|
||||
prelude_info.return_from_run_code = code.ptr<void*>();
|
||||
{
|
||||
code.l(return_from_run_code);
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(X1, X1, Xticks);
|
||||
code.BL(prelude_info.add_ticks);
|
||||
}
|
||||
|
||||
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
|
||||
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
|
||||
|
||||
oaknut::Label exit_hr_loop;
|
||||
code.l(exit_hr_loop);
|
||||
code.LDAXR(W0, Xhalt);
|
||||
code.STLXR(Wscratch0, WZR, Xhalt);
|
||||
code.CBNZ(Wscratch0, exit_hr_loop);
|
||||
|
||||
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
|
||||
code.RET();
|
||||
}
|
||||
|
||||
prelude_info.end_of_prelude = code.ptr<u32*>();
|
||||
|
||||
mem.invalidate_all();
|
||||
|
@ -267,6 +324,9 @@ void A32AddressSpace::Link(EmittedBlockInfo& block_info) {
|
|||
CodeGenerator c{reinterpret_cast<u32*>(block_info.entry_point + ptr_offset)};
|
||||
|
||||
switch (target) {
|
||||
case LinkTarget::ReturnToDispatcher:
|
||||
c.B(prelude_info.return_to_dispatcher);
|
||||
break;
|
||||
case LinkTarget::ReturnFromRunCode:
|
||||
c.B(prelude_info.return_from_run_code);
|
||||
break;
|
||||
|
|
|
@ -55,6 +55,7 @@ private:
|
|||
using RunCodeFuncType = HaltReason (*)(CodePtr entry_point, A32JitState* context, volatile u32* halt_reason);
|
||||
RunCodeFuncType run_code;
|
||||
RunCodeFuncType step_code;
|
||||
void* return_to_dispatcher;
|
||||
void* return_from_run_code;
|
||||
|
||||
void* read_memory_8;
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace Dynarmic::Backend::Arm64 {
|
|||
|
||||
constexpr oaknut::XReg Xstate{28};
|
||||
constexpr oaknut::XReg Xhalt{27};
|
||||
constexpr oaknut::XReg Xticks{26};
|
||||
|
||||
constexpr oaknut::XReg Xscratch0{16}, Xscratch1{17};
|
||||
constexpr oaknut::WReg Wscratch0{16}, Wscratch1{17};
|
||||
|
@ -40,7 +41,7 @@ constexpr auto Rscratch1() {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr std::initializer_list<int> GPR_ORDER{19, 20, 21, 22, 23, 24, 25, 26, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
constexpr std::initializer_list<int> GPR_ORDER{19, 20, 21, 22, 23, 24, 25, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
constexpr std::initializer_list<int> FPR_ORDER{8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||
|
||||
using RegisterList = u64;
|
||||
|
|
|
@ -135,15 +135,17 @@ void EmitIR<IR::Opcode::NZCVFromPackedFlags>(oaknut::CodeGenerator&, EmitContext
|
|||
ctx.reg_alloc.DefineAsExisting(inst, args[0]);
|
||||
}
|
||||
|
||||
static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext&, size_t cycles_to_add) {
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_remaining));
|
||||
static void EmitAddCycles(oaknut::CodeGenerator& code, EmitContext& ctx, size_t cycles_to_add) {
|
||||
if (!ctx.conf.enable_cycle_counting) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (oaknut::AddSubImm::is_valid(cycles_to_add)) {
|
||||
code.SUBS(Xscratch0, Xscratch0, cycles_to_add);
|
||||
code.SUB(Xticks, Xticks, cycles_to_add);
|
||||
} else {
|
||||
code.MOV(Xscratch1, cycles_to_add);
|
||||
code.SUBS(Xscratch0, Xscratch0, Xscratch1);
|
||||
code.SUB(Xticks, Xticks, Xscratch1);
|
||||
}
|
||||
code.STR(Xscratch0, SP, offsetof(StackLayout, cycles_remaining));
|
||||
}
|
||||
|
||||
EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const EmitConfig& conf) {
|
||||
|
@ -161,9 +163,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
|||
ASSERT(ctx.block.HasConditionFailedLocation());
|
||||
|
||||
oaknut::Label pass = EmitA32Cond(code, ctx, ctx.block.GetCondition());
|
||||
if (conf.enable_cycle_counting) {
|
||||
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
|
||||
}
|
||||
EmitA32ConditionFailedTerminal(code, ctx);
|
||||
code.l(pass);
|
||||
}
|
||||
|
@ -201,10 +201,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
|
|||
|
||||
reg_alloc.AssertNoMoreUses();
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitAddCycles(code, ctx, block.CycleCount());
|
||||
}
|
||||
|
||||
EmitA32Terminal(code, ctx);
|
||||
|
||||
ebi.size = code.ptr<CodePtr>() - ebi.entry_point;
|
||||
|
|
|
@ -39,6 +39,7 @@ namespace Dynarmic::Backend::Arm64 {
|
|||
using CodePtr = std::byte*;
|
||||
|
||||
enum class LinkTarget {
|
||||
ReturnToDispatcher,
|
||||
ReturnFromRunCode,
|
||||
ReadMemory8,
|
||||
ReadMemory16,
|
||||
|
|
|
@ -38,7 +38,7 @@ void EmitA32Terminal(oaknut::CodeGenerator&, EmitContext&, IR::Term::Interpret,
|
|||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::ReturnToDispatch, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitSetUpperLocationDescriptor(oaknut::CodeGenerator& code, EmitContext& ctx, IR::LocationDescriptor new_location, IR::LocationDescriptor old_location) {
|
||||
|
@ -63,7 +63,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Li
|
|||
|
||||
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement LinkBlock optimization
|
||||
}
|
||||
|
@ -73,19 +73,19 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Li
|
|||
|
||||
code.MOV(Wscratch0, A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.STR(Wscratch0, Xstate, offsetof(A32JitState, regs) + sizeof(u32) * 15);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement LinkBlockFast optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::PopRSBHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement PopRSBHint optimization
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::FastDispatchHint, IR::LocationDescriptor, bool) {
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
|
||||
// TODO: Implement FastDispatchHint optimization
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Ch
|
|||
code.CBNZ(Wscratch0, fail);
|
||||
EmitA32Terminal(code, ctx, terminal.else_, initial_location, is_single_step);
|
||||
code.l(fail);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnFromRunCode);
|
||||
EmitRelocation(code, ctx, LinkTarget::ReturnToDispatcher);
|
||||
}
|
||||
|
||||
void EmitA32Terminal(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Term::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
|
@ -508,11 +508,9 @@ void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitCont
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(nullptr);
|
||||
|
||||
static_assert(offsetof(StackLayout, cycles_remaining) + sizeof(u64) == offsetof(StackLayout, cycles_to_run));
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDP(Xscratch0, Xscratch1, SP, offsetof(StackLayout, cycles_remaining));
|
||||
code.SUB(Xscratch0, Xscratch1, Xscratch0);
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(Xscratch0, Xscratch0, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
|
@ -521,7 +519,8 @@ void EmitIR<IR::Opcode::A32CallSupervisor>(oaknut::CodeGenerator& code, EmitCont
|
|||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STP(X0, X0, SP, offsetof(StackLayout, cycles_remaining));
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -530,11 +529,9 @@ void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitCon
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.PrepareForCall(nullptr);
|
||||
|
||||
static_assert(offsetof(StackLayout, cycles_remaining) + sizeof(u64) == offsetof(StackLayout, cycles_to_run));
|
||||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
code.LDP(Xscratch0, Xscratch1, SP, offsetof(StackLayout, cycles_remaining));
|
||||
code.SUB(Xscratch0, Xscratch1, Xscratch0);
|
||||
code.LDR(Xscratch0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.SUB(Xscratch0, Xscratch0, Xticks);
|
||||
EmitRelocation(code, ctx, LinkTarget::AddTicks);
|
||||
}
|
||||
|
||||
|
@ -544,7 +541,8 @@ void EmitIR<IR::Opcode::A32ExceptionRaised>(oaknut::CodeGenerator& code, EmitCon
|
|||
|
||||
if (ctx.conf.enable_cycle_counting) {
|
||||
EmitRelocation(code, ctx, LinkTarget::GetTicksRemaining);
|
||||
code.STP(X0, X0, SP, offsetof(StackLayout, cycles_remaining));
|
||||
code.STR(X0, SP, offsetof(StackLayout, cycles_to_run));
|
||||
code.MOV(Xticks, X0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,11 +19,10 @@ constexpr size_t SpillCount = 64;
|
|||
#endif
|
||||
|
||||
struct alignas(16) StackLayout {
|
||||
s64 cycles_remaining;
|
||||
s64 cycles_to_run;
|
||||
|
||||
std::array<std::array<u64, 2>, SpillCount> spill;
|
||||
|
||||
s64 cycles_to_run;
|
||||
|
||||
u32 save_host_fpcr;
|
||||
|
||||
bool check_bit;
|
||||
|
|
Loading…
Reference in a new issue