core/arm: Improve timing accuracy before service calls in JIT
We also correct the CPU JIT's implementation of Step.
This commit is contained in:
parent
dc030c78c3
commit
7cd8b437aa
9 changed files with 59 additions and 53 deletions
2
externals/dynarmic
vendored
2
externals/dynarmic
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit dfbd3912a4b8e0d28e1a4045893a750f0107fbaa
|
Subproject commit f343c56268ef3f8fbed5bbc513fbc56430a47255
|
|
@ -24,19 +24,11 @@ public:
|
||||||
u32 fpexc;
|
u32 fpexc;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/// Runs the CPU until an event happens
|
||||||
* Runs the CPU for the given number of instructions
|
virtual void Run() = 0;
|
||||||
* @param num_instructions Number of instructions to run
|
|
||||||
*/
|
|
||||||
void Run(int num_instructions) {
|
|
||||||
ExecuteInstructions(num_instructions);
|
|
||||||
this->num_instructions += num_instructions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Step CPU by one instruction
|
/// Step CPU by one instruction
|
||||||
void Step() {
|
virtual void Step() = 0;
|
||||||
Run(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Clear all instruction cache
|
/// Clear all instruction cache
|
||||||
virtual void ClearInstructionCache() = 0;
|
virtual void ClearInstructionCache() = 0;
|
||||||
|
@ -138,19 +130,4 @@ public:
|
||||||
|
|
||||||
/// Prepare core for thread reschedule (if needed to correctly handle state)
|
/// Prepare core for thread reschedule (if needed to correctly handle state)
|
||||||
virtual void PrepareReschedule() = 0;
|
virtual void PrepareReschedule() = 0;
|
||||||
|
|
||||||
/// Getter for num_instructions
|
|
||||||
u64 GetNumInstructions() const {
|
|
||||||
return num_instructions;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/**
|
|
||||||
* Executes the given number of instructions
|
|
||||||
* @param num_instructions Number of instructions to executes
|
|
||||||
*/
|
|
||||||
virtual void ExecuteInstructions(int num_instructions) = 0;
|
|
||||||
|
|
||||||
private:
|
|
||||||
u64 num_instructions = 0; ///< Number of instructions executed
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -40,11 +40,20 @@ static bool IsReadOnlyMemory(u32 vaddr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void AddTicks(u64 ticks) {
|
||||||
|
CoreTiming::AddTicks(ticks);
|
||||||
|
}
|
||||||
|
|
||||||
|
static u64 GetTicksRemaining() {
|
||||||
|
int ticks = CoreTiming::GetDowncount();
|
||||||
|
return static_cast<u64>(ticks <= 0 ? 0 : ticks);
|
||||||
|
}
|
||||||
|
|
||||||
static Dynarmic::UserCallbacks GetUserCallbacks(
|
static Dynarmic::UserCallbacks GetUserCallbacks(
|
||||||
const std::shared_ptr<ARMul_State>& interpeter_state, Memory::PageTable* current_page_table) {
|
const std::shared_ptr<ARMul_State>& interpreter_state, Memory::PageTable* current_page_table) {
|
||||||
Dynarmic::UserCallbacks user_callbacks{};
|
Dynarmic::UserCallbacks user_callbacks{};
|
||||||
user_callbacks.InterpreterFallback = &InterpreterFallback;
|
user_callbacks.InterpreterFallback = &InterpreterFallback;
|
||||||
user_callbacks.user_arg = static_cast<void*>(interpeter_state.get());
|
user_callbacks.user_arg = static_cast<void*>(interpreter_state.get());
|
||||||
user_callbacks.CallSVC = &SVC::CallSVC;
|
user_callbacks.CallSVC = &SVC::CallSVC;
|
||||||
user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory;
|
user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory;
|
||||||
user_callbacks.memory.ReadCode = &Memory::Read32;
|
user_callbacks.memory.ReadCode = &Memory::Read32;
|
||||||
|
@ -56,8 +65,10 @@ static Dynarmic::UserCallbacks GetUserCallbacks(
|
||||||
user_callbacks.memory.Write16 = &Memory::Write16;
|
user_callbacks.memory.Write16 = &Memory::Write16;
|
||||||
user_callbacks.memory.Write32 = &Memory::Write32;
|
user_callbacks.memory.Write32 = &Memory::Write32;
|
||||||
user_callbacks.memory.Write64 = &Memory::Write64;
|
user_callbacks.memory.Write64 = &Memory::Write64;
|
||||||
|
user_callbacks.AddTicks = &AddTicks;
|
||||||
|
user_callbacks.GetTicksRemaining = &GetTicksRemaining;
|
||||||
user_callbacks.page_table = ¤t_page_table->pointers;
|
user_callbacks.page_table = ¤t_page_table->pointers;
|
||||||
user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state);
|
user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpreter_state);
|
||||||
return user_callbacks;
|
return user_callbacks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,6 +77,19 @@ ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) {
|
||||||
PageTableChanged();
|
PageTableChanged();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64));
|
||||||
|
|
||||||
|
void ARM_Dynarmic::Run() {
|
||||||
|
ASSERT(Memory::GetCurrentPageTable() == current_page_table);
|
||||||
|
MICROPROFILE_SCOPE(ARM_Jit);
|
||||||
|
|
||||||
|
jit->Run(GetTicksRemaining());
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARM_Dynarmic::Step() {
|
||||||
|
InterpreterFallback(jit->Regs()[15], jit, static_cast<void*>(interpreter_state.get()));
|
||||||
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic::SetPC(u32 pc) {
|
void ARM_Dynarmic::SetPC(u32 pc) {
|
||||||
jit->Regs()[15] = pc;
|
jit->Regs()[15] = pc;
|
||||||
}
|
}
|
||||||
|
@ -124,17 +148,6 @@ void ARM_Dynarmic::SetCP15Register(CP15Register reg, u32 value) {
|
||||||
interpreter_state->CP15[reg] = value;
|
interpreter_state->CP15[reg] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64));
|
|
||||||
|
|
||||||
void ARM_Dynarmic::ExecuteInstructions(int num_instructions) {
|
|
||||||
ASSERT(Memory::GetCurrentPageTable() == current_page_table);
|
|
||||||
MICROPROFILE_SCOPE(ARM_Jit);
|
|
||||||
|
|
||||||
std::size_t ticks_executed = jit->Run(static_cast<unsigned>(num_instructions));
|
|
||||||
|
|
||||||
CoreTiming::AddTicks(ticks_executed);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ARM_Dynarmic::SaveContext(ARM_Interface::ThreadContext& ctx) {
|
void ARM_Dynarmic::SaveContext(ARM_Interface::ThreadContext& ctx) {
|
||||||
memcpy(ctx.cpu_registers, jit->Regs().data(), sizeof(ctx.cpu_registers));
|
memcpy(ctx.cpu_registers, jit->Regs().data(), sizeof(ctx.cpu_registers));
|
||||||
memcpy(ctx.fpu_registers, jit->ExtRegs().data(), sizeof(ctx.fpu_registers));
|
memcpy(ctx.fpu_registers, jit->ExtRegs().data(), sizeof(ctx.fpu_registers));
|
||||||
|
@ -168,6 +181,7 @@ void ARM_Dynarmic::PrepareReschedule() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM_Dynarmic::ClearInstructionCache() {
|
void ARM_Dynarmic::ClearInstructionCache() {
|
||||||
|
// TODO: Clear interpreter cache when appropriate.
|
||||||
for (const auto& j : jits) {
|
for (const auto& j : jits) {
|
||||||
j.second->ClearCache();
|
j.second->ClearCache();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@ class ARM_Dynarmic final : public ARM_Interface {
|
||||||
public:
|
public:
|
||||||
ARM_Dynarmic(PrivilegeMode initial_mode);
|
ARM_Dynarmic(PrivilegeMode initial_mode);
|
||||||
|
|
||||||
|
void Run() override;
|
||||||
|
void Step() override;
|
||||||
|
|
||||||
void SetPC(u32 pc) override;
|
void SetPC(u32 pc) override;
|
||||||
u32 GetPC() const override;
|
u32 GetPC() const override;
|
||||||
u32 GetReg(int index) const override;
|
u32 GetReg(int index) const override;
|
||||||
|
@ -36,7 +39,6 @@ public:
|
||||||
void LoadContext(const ThreadContext& ctx) override;
|
void LoadContext(const ThreadContext& ctx) override;
|
||||||
|
|
||||||
void PrepareReschedule() override;
|
void PrepareReschedule() override;
|
||||||
void ExecuteInstructions(int num_instructions) override;
|
|
||||||
|
|
||||||
void ClearInstructionCache() override;
|
void ClearInstructionCache() override;
|
||||||
void PageTableChanged() override;
|
void PageTableChanged() override;
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include "core/arm/dyncom/arm_dyncom.h"
|
#include "core/arm/dyncom/arm_dyncom.h"
|
||||||
|
@ -20,6 +21,14 @@ ARM_DynCom::ARM_DynCom(PrivilegeMode initial_mode) {
|
||||||
|
|
||||||
ARM_DynCom::~ARM_DynCom() {}
|
ARM_DynCom::~ARM_DynCom() {}
|
||||||
|
|
||||||
|
void ARM_DynCom::Run() {
|
||||||
|
ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARM_DynCom::Step() {
|
||||||
|
ExecuteInstructions(1);
|
||||||
|
}
|
||||||
|
|
||||||
void ARM_DynCom::ClearInstructionCache() {
|
void ARM_DynCom::ClearInstructionCache() {
|
||||||
state->instruction_cache.clear();
|
state->instruction_cache.clear();
|
||||||
trans_cache_buf_top = 0;
|
trans_cache_buf_top = 0;
|
||||||
|
@ -79,10 +88,6 @@ void ARM_DynCom::SetCP15Register(CP15Register reg, u32 value) {
|
||||||
|
|
||||||
void ARM_DynCom::ExecuteInstructions(int num_instructions) {
|
void ARM_DynCom::ExecuteInstructions(int num_instructions) {
|
||||||
state->NumInstrsToExecute = num_instructions;
|
state->NumInstrsToExecute = num_instructions;
|
||||||
|
|
||||||
// Dyncom only breaks on instruction dispatch. This only happens on every instruction when
|
|
||||||
// executing one instruction at a time. Otherwise, if a block is being executed, more
|
|
||||||
// instructions may actually be executed than specified.
|
|
||||||
unsigned ticks_executed = InterpreterMainLoop(state.get());
|
unsigned ticks_executed = InterpreterMainLoop(state.get());
|
||||||
CoreTiming::AddTicks(ticks_executed);
|
CoreTiming::AddTicks(ticks_executed);
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,6 +15,9 @@ public:
|
||||||
ARM_DynCom(PrivilegeMode initial_mode);
|
ARM_DynCom(PrivilegeMode initial_mode);
|
||||||
~ARM_DynCom();
|
~ARM_DynCom();
|
||||||
|
|
||||||
|
void Run() override;
|
||||||
|
void Step() override;
|
||||||
|
|
||||||
void ClearInstructionCache() override;
|
void ClearInstructionCache() override;
|
||||||
void PageTableChanged() override;
|
void PageTableChanged() override;
|
||||||
|
|
||||||
|
@ -35,8 +38,9 @@ public:
|
||||||
void LoadContext(const ThreadContext& ctx) override;
|
void LoadContext(const ThreadContext& ctx) override;
|
||||||
|
|
||||||
void PrepareReschedule() override;
|
void PrepareReschedule() override;
|
||||||
void ExecuteInstructions(int num_instructions) override;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void ExecuteInstructions(int num_instructions);
|
||||||
|
|
||||||
std::unique_ptr<ARMul_State> state;
|
std::unique_ptr<ARMul_State> state;
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace Core {
|
||||||
|
|
||||||
/*static*/ System System::s_instance;
|
/*static*/ System System::s_instance;
|
||||||
|
|
||||||
System::ResultStatus System::RunLoop(int tight_loop) {
|
System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||||
status = ResultStatus::Success;
|
status = ResultStatus::Success;
|
||||||
if (!cpu_core) {
|
if (!cpu_core) {
|
||||||
return ResultStatus::ErrorNotInitialized;
|
return ResultStatus::ErrorNotInitialized;
|
||||||
|
@ -57,7 +57,11 @@ System::ResultStatus System::RunLoop(int tight_loop) {
|
||||||
PrepareReschedule();
|
PrepareReschedule();
|
||||||
} else {
|
} else {
|
||||||
CoreTiming::Advance();
|
CoreTiming::Advance();
|
||||||
cpu_core->Run(tight_loop);
|
if (tight_loop) {
|
||||||
|
cpu_core->Run();
|
||||||
|
} else {
|
||||||
|
cpu_core->Step();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HW::Update();
|
HW::Update();
|
||||||
|
@ -67,7 +71,7 @@ System::ResultStatus System::RunLoop(int tight_loop) {
|
||||||
}
|
}
|
||||||
|
|
||||||
System::ResultStatus System::SingleStep() {
|
System::ResultStatus System::SingleStep() {
|
||||||
return RunLoop(1);
|
return RunLoop(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) {
|
System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) {
|
||||||
|
|
|
@ -50,10 +50,10 @@ public:
|
||||||
* is not required to do a full dispatch with each instruction. NOTE: the number of instructions
|
* is not required to do a full dispatch with each instruction. NOTE: the number of instructions
|
||||||
* requested is not guaranteed to run, as this will be interrupted preemptively if a hardware
|
* requested is not guaranteed to run, as this will be interrupted preemptively if a hardware
|
||||||
* update is requested (e.g. on a thread switch).
|
* update is requested (e.g. on a thread switch).
|
||||||
* @param tight_loop Number of instructions to execute.
|
* @param tight_loop If false, the CPU single-steps.
|
||||||
* @return Result status, indicating whethor or not the operation succeeded.
|
* @return Result status, indicating whethor or not the operation succeeded.
|
||||||
*/
|
*/
|
||||||
ResultStatus RunLoop(int tight_loop = 1000);
|
ResultStatus RunLoop(bool tight_loop = true);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Step the CPU one instruction
|
* Step the CPU one instruction
|
||||||
|
|
|
@ -34,7 +34,7 @@ TEST_CASE("ARM_DynCom (vfp): vadd", "[arm_dyncom]") {
|
||||||
dyncom.SetVFPSystemReg(VFP_FPSCR, test_case.initial_fpscr);
|
dyncom.SetVFPSystemReg(VFP_FPSCR, test_case.initial_fpscr);
|
||||||
dyncom.SetVFPReg(4, test_case.a);
|
dyncom.SetVFPReg(4, test_case.a);
|
||||||
dyncom.SetVFPReg(6, test_case.b);
|
dyncom.SetVFPReg(6, test_case.b);
|
||||||
dyncom.ExecuteInstructions(1);
|
dyncom.Step();
|
||||||
if (dyncom.GetVFPReg(2) != test_case.result ||
|
if (dyncom.GetVFPReg(2) != test_case.result ||
|
||||||
dyncom.GetVFPSystemReg(VFP_FPSCR) != test_case.final_fpscr) {
|
dyncom.GetVFPSystemReg(VFP_FPSCR) != test_case.final_fpscr) {
|
||||||
printf("f: %x\n", test_case.initial_fpscr);
|
printf("f: %x\n", test_case.initial_fpscr);
|
||||||
|
|
Loading…
Reference in a new issue