From 7cd8b437aa0b32638fe23c6bfb6eccc78d0c9d53 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 3 Dec 2017 02:57:08 +0000 Subject: [PATCH 1/2] core/arm: Improve timing accuracy before service calls in JIT We also correct the CPU JIT's implementation of Step. --- externals/dynarmic | 2 +- src/core/arm/arm_interface.h | 29 ++----------- src/core/arm/dynarmic/arm_dynarmic.cpp | 42 ++++++++++++------- src/core/arm/dynarmic/arm_dynarmic.h | 4 +- src/core/arm/dyncom/arm_dyncom.cpp | 13 ++++-- src/core/arm/dyncom/arm_dyncom.h | 6 ++- src/core/core.cpp | 10 +++-- src/core/core.h | 4 +- .../core/arm/dyncom/arm_dyncom_vfp_tests.cpp | 2 +- 9 files changed, 59 insertions(+), 53 deletions(-) diff --git a/externals/dynarmic b/externals/dynarmic index dfbd3912a..f343c5626 160000 --- a/externals/dynarmic +++ b/externals/dynarmic @@ -1 +1 @@ -Subproject commit dfbd3912a4b8e0d28e1a4045893a750f0107fbaa +Subproject commit f343c56268ef3f8fbed5bbc513fbc56430a47255 diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index ba528403c..436425c83 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -24,19 +24,11 @@ public: u32 fpexc; }; - /** - * Runs the CPU for the given number of instructions - * @param num_instructions Number of instructions to run - */ - void Run(int num_instructions) { - ExecuteInstructions(num_instructions); - this->num_instructions += num_instructions; - } + /// Runs the CPU until an event happens + virtual void Run() = 0; /// Step CPU by one instruction - void Step() { - Run(1); - } + virtual void Step() = 0; /// Clear all instruction cache virtual void ClearInstructionCache() = 0; @@ -138,19 +130,4 @@ public: /// Prepare core for thread reschedule (if needed to correctly handle state) virtual void PrepareReschedule() = 0; - - /// Getter for num_instructions - u64 GetNumInstructions() const { - return num_instructions; - } - -protected: - /** - * Executes the given number of instructions - * @param num_instructions Number of instructions to executes - */ - virtual void ExecuteInstructions(int num_instructions) = 0; - -private: - u64 num_instructions = 0; ///< Number of instructions executed }; diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 0a6d5c260..1c99fc9b8 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -40,11 +40,20 @@ static bool IsReadOnlyMemory(u32 vaddr) { return false; } +static void AddTicks(u64 ticks) { + CoreTiming::AddTicks(ticks); +} + +static u64 GetTicksRemaining() { + int ticks = CoreTiming::GetDowncount(); + return static_cast(ticks <= 0 ? 0 : ticks); +} + static Dynarmic::UserCallbacks GetUserCallbacks( - const std::shared_ptr& interpeter_state, Memory::PageTable* current_page_table) { + const std::shared_ptr& interpreter_state, Memory::PageTable* current_page_table) { Dynarmic::UserCallbacks user_callbacks{}; user_callbacks.InterpreterFallback = &InterpreterFallback; - user_callbacks.user_arg = static_cast(interpeter_state.get()); + user_callbacks.user_arg = static_cast(interpreter_state.get()); user_callbacks.CallSVC = &SVC::CallSVC; user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory; user_callbacks.memory.ReadCode = &Memory::Read32; @@ -56,8 +65,10 @@ static Dynarmic::UserCallbacks GetUserCallbacks( user_callbacks.memory.Write16 = &Memory::Write16; user_callbacks.memory.Write32 = &Memory::Write32; user_callbacks.memory.Write64 = &Memory::Write64; + user_callbacks.AddTicks = &AddTicks; + user_callbacks.GetTicksRemaining = &GetTicksRemaining; user_callbacks.page_table = ¤t_page_table->pointers; - user_callbacks.coprocessors[15] = std::make_shared(interpeter_state); + user_callbacks.coprocessors[15] = std::make_shared(interpreter_state); return user_callbacks; } @@ -66,6 +77,19 @@ ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) { PageTableChanged(); } +MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64)); + +void ARM_Dynarmic::Run() { + ASSERT(Memory::GetCurrentPageTable() == current_page_table); + MICROPROFILE_SCOPE(ARM_Jit); + + jit->Run(GetTicksRemaining()); +} + +void ARM_Dynarmic::Step() { + InterpreterFallback(jit->Regs()[15], jit, static_cast(interpreter_state.get())); +} + void ARM_Dynarmic::SetPC(u32 pc) { jit->Regs()[15] = pc; } @@ -124,17 +148,6 @@ void ARM_Dynarmic::SetCP15Register(CP15Register reg, u32 value) { interpreter_state->CP15[reg] = value; } -MICROPROFILE_DEFINE(ARM_Jit, "ARM JIT", "ARM JIT", MP_RGB(255, 64, 64)); - -void ARM_Dynarmic::ExecuteInstructions(int num_instructions) { - ASSERT(Memory::GetCurrentPageTable() == current_page_table); - MICROPROFILE_SCOPE(ARM_Jit); - - std::size_t ticks_executed = jit->Run(static_cast(num_instructions)); - - CoreTiming::AddTicks(ticks_executed); -} - void ARM_Dynarmic::SaveContext(ARM_Interface::ThreadContext& ctx) { memcpy(ctx.cpu_registers, jit->Regs().data(), sizeof(ctx.cpu_registers)); memcpy(ctx.fpu_registers, jit->ExtRegs().data(), sizeof(ctx.fpu_registers)); @@ -168,6 +181,7 @@ void ARM_Dynarmic::PrepareReschedule() { } void ARM_Dynarmic::ClearInstructionCache() { + // TODO: Clear interpreter cache when appropriate. for (const auto& j : jits) { j.second->ClearCache(); } diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 0b00158a5..ffedfbc91 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -19,6 +19,9 @@ class ARM_Dynarmic final : public ARM_Interface { public: ARM_Dynarmic(PrivilegeMode initial_mode); + void Run() override; + void Step() override; + void SetPC(u32 pc) override; u32 GetPC() const override; u32 GetReg(int index) const override; @@ -36,7 +39,6 @@ public: void LoadContext(const ThreadContext& ctx) override; void PrepareReschedule() override; - void ExecuteInstructions(int num_instructions) override; void ClearInstructionCache() override; void PageTableChanged() override; diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index 4d72aef77..7e0624e16 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include "core/arm/dyncom/arm_dyncom.h" @@ -20,6 +21,14 @@ ARM_DynCom::ARM_DynCom(PrivilegeMode initial_mode) { ARM_DynCom::~ARM_DynCom() {} +void ARM_DynCom::Run() { + ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0)); +} + +void ARM_DynCom::Step() { + ExecuteInstructions(1); +} + void ARM_DynCom::ClearInstructionCache() { state->instruction_cache.clear(); trans_cache_buf_top = 0; @@ -79,10 +88,6 @@ void ARM_DynCom::SetCP15Register(CP15Register reg, u32 value) { void ARM_DynCom::ExecuteInstructions(int num_instructions) { state->NumInstrsToExecute = num_instructions; - - // Dyncom only breaks on instruction dispatch. This only happens on every instruction when - // executing one instruction at a time. Otherwise, if a block is being executed, more - // instructions may actually be executed than specified. unsigned ticks_executed = InterpreterMainLoop(state.get()); CoreTiming::AddTicks(ticks_executed); } diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h index fc1ffed6a..3c6cc3bda 100644 --- a/src/core/arm/dyncom/arm_dyncom.h +++ b/src/core/arm/dyncom/arm_dyncom.h @@ -15,6 +15,9 @@ public: ARM_DynCom(PrivilegeMode initial_mode); ~ARM_DynCom(); + void Run() override; + void Step() override; + void ClearInstructionCache() override; void PageTableChanged() override; @@ -35,8 +38,9 @@ public: void LoadContext(const ThreadContext& ctx) override; void PrepareReschedule() override; - void ExecuteInstructions(int num_instructions) override; private: + void ExecuteInstructions(int num_instructions); + std::unique_ptr state; }; diff --git a/src/core/core.cpp b/src/core/core.cpp index d18532190..44f304b15 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -27,7 +27,7 @@ namespace Core { /*static*/ System System::s_instance; -System::ResultStatus System::RunLoop(int tight_loop) { +System::ResultStatus System::RunLoop(bool tight_loop) { status = ResultStatus::Success; if (!cpu_core) { return ResultStatus::ErrorNotInitialized; @@ -57,7 +57,11 @@ System::ResultStatus System::RunLoop(int tight_loop) { PrepareReschedule(); } else { CoreTiming::Advance(); - cpu_core->Run(tight_loop); + if (tight_loop) { + cpu_core->Run(); + } else { + cpu_core->Step(); + } } HW::Update(); @@ -67,7 +71,7 @@ System::ResultStatus System::RunLoop(int tight_loop) { } System::ResultStatus System::SingleStep() { - return RunLoop(1); + return RunLoop(false); } System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) { diff --git a/src/core/core.h b/src/core/core.h index 9805cc694..3bf0b2402 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -50,10 +50,10 @@ public: * is not required to do a full dispatch with each instruction. NOTE: the number of instructions * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware * update is requested (e.g. on a thread switch). - * @param tight_loop Number of instructions to execute. + * @param tight_loop If false, the CPU single-steps. * @return Result status, indicating whethor or not the operation succeeded. */ - ResultStatus RunLoop(int tight_loop = 1000); + ResultStatus RunLoop(bool tight_loop = true); /** * Step the CPU one instruction diff --git a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp index 83719a58e..04ac83d2b 100644 --- a/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp +++ b/src/tests/core/arm/dyncom/arm_dyncom_vfp_tests.cpp @@ -34,7 +34,7 @@ TEST_CASE("ARM_DynCom (vfp): vadd", "[arm_dyncom]") { dyncom.SetVFPSystemReg(VFP_FPSCR, test_case.initial_fpscr); dyncom.SetVFPReg(4, test_case.a); dyncom.SetVFPReg(6, test_case.b); - dyncom.ExecuteInstructions(1); + dyncom.Step(); if (dyncom.GetVFPReg(2) != test_case.result || dyncom.GetVFPSystemReg(VFP_FPSCR) != test_case.final_fpscr) { printf("f: %x\n", test_case.initial_fpscr); From f6dfdc35886e4fae989c7ab5ab56b4a75314b721 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 3 Dec 2017 16:40:21 +0000 Subject: [PATCH 2/2] core/arm: Improve timing accuracy before service calls in CPU interpreter --- src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 3522d1e82..8f7126ae3 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -18,6 +18,7 @@ #include "core/arm/skyeye_common/armstate.h" #include "core/arm/skyeye_common/armsupp.h" #include "core/arm/skyeye_common/vfp/vfp.h" +#include "core/core_timing.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/svc.h" #include "core/memory.h" @@ -3859,6 +3860,10 @@ SUB_INST : { SWI_INST : { if (inst_base->cond == ConditionCode::AL || CondPassed(cpu, inst_base->cond)) { swi_inst* const inst_cream = (swi_inst*)inst_base->component; + CoreTiming::AddTicks(num_instrs); + cpu->NumInstrsToExecute = + num_instrs >= cpu->NumInstrsToExecute ? 0 : cpu->NumInstrsToExecute - num_instrs; + num_instrs = 0; SVC::CallSVC(inst_cream->num & 0xFFFF); }