Implement enable_cycle_counting

This commit is contained in:
merry 2022-04-03 16:10:32 +01:00
parent aac1f6ab1b
commit 644172477e
12 changed files with 143 additions and 52 deletions

View file

@ -149,7 +149,9 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses();
EmitAddCycles(block.CycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
@ -184,7 +186,9 @@ void A32EmitX64::EmitCondPrelude(const A32EmitContext& ctx) {
ASSERT(ctx.block.HasConditionFailedLocation());
Xbyak::Label pass = EmitCond(ctx.block.GetCondition());
EmitAddCycles(ctx.block.ConditionFailedCycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(ctx.block.ConditionFailedCycleCount());
}
EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.L(pass);
}
@ -715,31 +719,37 @@ void A32EmitX64::EmitA32UpdateUpperLocationDescriptor(A32EmitContext& ctx, IR::I
}
void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit();
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope();
if (conf.enable_cycle_counting) {
ctx.reg_alloc.HostCall(nullptr);
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.EndOfAllocScope();
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
}
}
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
code.SwitchMxcsrOnExit();
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
ctx.reg_alloc.HostCall(nullptr);
if (conf.enable_cycle_counting) {
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
}
ctx.reg_alloc.EndOfAllocScope();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@ -751,10 +761,12 @@ void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(param[1], exception);
});
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
if (conf.enable_cycle_counting) {
Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(conf.callbacks).EmitCall(code);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], code.ABI_RETURN);
code.mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], code.ABI_RETURN);
code.SwitchMxcsrOnEntry();
}
}
static u32 GetFpscrImpl(A32JitState* jit_state) {
@ -1134,14 +1146,26 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return;
}
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJg(terminal.next);
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
Xbyak::Label dest;
code.jmp(dest, Xbyak::CodeGenerator::T_NEAR);
@ -1222,6 +1246,17 @@ void A32EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 14);
}
void A32EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{target_desc}.PC());
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 14);
}
void A32EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {

View file

@ -138,6 +138,7 @@ protected:
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
};

View file

@ -32,11 +32,12 @@ namespace Dynarmic::A32 {
using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A32::UserConfig& conf) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
};
}
@ -59,7 +60,7 @@ static Optimization::PolyfillOptions GenPolyfillOptions(const BlockOfCode& code)
struct Jit::Impl {
Impl(Jit* jit, A32::UserConfig conf)
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
: block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code))
, conf(std::move(conf))

View file

@ -121,7 +121,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses();
EmitAddCycles(block.CycleCount());
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
}
EmitX64::EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
@ -619,14 +621,26 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
return;
}
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
if (conf.enable_cycle_counting) {
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
patch_information[terminal.next].jg.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJg(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJg(terminal.next);
}
} else {
EmitPatchJg(terminal.next);
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
patch_information[terminal.next].jz.emplace_back(code.getCurr());
if (const auto next_bb = GetBasicBlock(terminal.next)) {
EmitPatchJz(terminal.next, next_bb->entrypoint);
} else {
EmitPatchJz(terminal.next);
}
}
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.ForceReturnFromRunCode();
@ -708,6 +722,18 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
code.EnsurePatchLocationSize(patch_location, 23);
}
void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {
code.jz(target_code_ptr);
} else {
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
code.jz(code.GetReturnFromRunCodeAddress());
}
code.EnsurePatchLocationSize(patch_location, 23);
}
void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) {
const CodePtr patch_location = code.getCurr();
if (target_code_ptr) {

View file

@ -135,6 +135,7 @@ protected:
// Patching
void Unpatch(const IR::LocationDescriptor& target_desc) override;
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
};

View file

@ -27,11 +27,12 @@ namespace Dynarmic::A64 {
using namespace Backend::X64;
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg, const A64::UserConfig& conf) {
return RunCodeCallbacks{
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)),
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)),
conf.enable_cycle_counting,
};
}
@ -56,7 +57,7 @@ struct Jit::Impl final {
public:
Impl(Jit* jit, UserConfig conf)
: conf(conf)
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this, conf), JitStateInfo{jit_state}, conf.code_cache_size, conf.far_code_offset, GenRCP(conf))
, emitter(block_of_code, conf, jit)
, polyfill_options(GenPolyfillOptions(block_of_code)) {
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);

View file

@ -238,9 +238,11 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1);
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
if (cb.enable_cycle_counting) {
cb.GetTicksRemaining->EmitCall(*this);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
}
rcp(*this);
@ -257,8 +259,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
mov(r15, ABI_PARAM1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
if (cb.enable_cycle_counting) {
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
}
rcp(*this);
@ -277,8 +281,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller);
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller);
}
cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN);
@ -287,8 +293,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
jne(return_to_caller_mxcsr_already_exited);
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller_mxcsr_already_exited);
if (cb.enable_cycle_counting) {
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
jng(return_to_caller_mxcsr_already_exited);
}
SwitchMxcsrOnEntry();
cb.LookupBlock->EmitCall(*this);
jmp(ABI_RETURN);
@ -303,10 +311,12 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
L(return_to_caller_mxcsr_already_exited);
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
});
if (cb.enable_cycle_counting) {
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
});
}
xor_(eax, eax);
lock();
@ -339,6 +349,10 @@ void BlockOfCode::LeaveStandardASIMD() {
}
void BlockOfCode::UpdateTicks() {
if (!cb.enable_cycle_counting) {
return;
}
cb.AddTicks->EmitCall(*this, [this](RegList param) {
mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);

View file

@ -30,6 +30,7 @@ struct RunCodeCallbacks {
std::unique_ptr<Callback> LookupBlock;
std::unique_ptr<Callback> AddTicks;
std::unique_ptr<Callback> GetTicksRemaining;
bool enable_cycle_counting;
};
class BlockOfCode final : public Xbyak::CodeGenerator {

View file

@ -306,6 +306,11 @@ void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_co
EmitPatchJg(target_desc, target_code_ptr);
}
for (CodePtr location : patch_info.jz) {
code.SetCodePtr(location);
EmitPatchJz(target_desc, target_code_ptr);
}
for (CodePtr location : patch_info.jmp) {
code.SetCodePtr(location);
EmitPatchJmp(target_desc, target_code_ptr);

View file

@ -111,12 +111,14 @@ protected:
// Patching
struct PatchInformation {
std::vector<CodePtr> jg;
std::vector<CodePtr> jz;
std::vector<CodePtr> jmp;
std::vector<CodePtr> mov_rcx;
};
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
virtual void Unpatch(const IR::LocationDescriptor& target_desc);
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;

View file

@ -208,6 +208,10 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false;
/// This option allows you to disable cycle counting. If this is set to false,
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
/// This option relates to the CPSR.E flag. Enabling this option disables modification
/// of CPSR.E by the emulated program, forcing it to 0.
/// NOTE: Calling Jit::SetCpsr with CPSR.E=1 while this option is enabled may result

View file

@ -273,9 +273,9 @@ struct UserConfig {
/// to avoid writting certain unnecessary code only needed for cycle timers.
bool wall_clock_cntpct = false;
// Determines whether AddTicks and GetTicksRemaining are called.
// If false, execution will continue until soon after Jit::HaltExecution is called.
// bool enable_ticks = true; // TODO
/// This option allows you to disable cycle counting. If this is set to false,
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
// Minimum size is about 8MiB. Maximum size is about 2GiB. Maximum size is limited by
// the maximum length of a x64 jump.