From 555866f8dcb98897688d5d7b0e6c6cca55ac069f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 9 Sep 2019 21:37:29 -0400 Subject: [PATCH 1/7] Core Timing: Rework Core Timing to run all cores evenly. --- src/core/arm/dynarmic/arm_dynarmic.cpp | 2 +- src/core/arm/unicorn/arm_unicorn.cpp | 2 +- src/core/core_cpu.cpp | 14 ++---- src/core/core_timing.cpp | 66 +++++++++++++++++++------- src/core/core_timing.h | 24 ++++++++-- src/core/cpu_core_manager.cpp | 19 +++++--- 6 files changed, 89 insertions(+), 38 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f1506b3728..4d2e99ed00 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -116,7 +116,7 @@ public: num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - return std::max(parent.system.CoreTiming().GetDowncount(), 0); + return std::max(parent.system.CoreTiming().GetDowncount(), 0LL); } u64 GetCNTPCT() override { return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 97d5c2a8ad..3f91b06d4a 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -156,7 +156,7 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000, 0)); } else { - ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0)); + ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0LL)); } } diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 21c410e342..6bd9639c6e 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -85,24 +85,16 @@ void Cpu::RunLoop(bool tight_loop) { // instead advance to the next event and try to yield to the next thread if (Kernel::GetCurrentThread() == nullptr) { LOG_TRACE(Core, "Core-{} idling", core_index); - - if (IsMainCore()) { - // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. - core_timing.Idle(); - core_timing.Advance(); - } - + core_timing.Idle(); + core_timing.Advance(); PrepareReschedule(); } else { - if (IsMainCore()) { - core_timing.Advance(); - } - if (tight_loop) { arm_interface->Run(); } else { arm_interface->Step(); } + core_timing.Advance(); } Reschedule(); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index a58f7b131d..6da2dcfb46 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -15,7 +15,7 @@ namespace Core::Timing { -constexpr int MAX_SLICE_LENGTH = 20000; +constexpr int MAX_SLICE_LENGTH = 10000; struct CoreTiming::Event { s64 time; @@ -38,10 +38,14 @@ CoreTiming::CoreTiming() = default; CoreTiming::~CoreTiming() = default; void CoreTiming::Initialize() { - downcount = MAX_SLICE_LENGTH; + for (std::size_t core = 0; core < num_cpu_cores; core++) { + downcounts[core] = MAX_SLICE_LENGTH; + time_slice[core] = MAX_SLICE_LENGTH; + } slice_length = MAX_SLICE_LENGTH; global_timer = 0; idled_cycles = 0; + current_context = 0; // The time between CoreTiming being initialized and the first call to Advance() is considered // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before @@ -110,7 +114,7 @@ void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) { u64 CoreTiming::GetTicks() const { u64 ticks = static_cast(global_timer); if (!is_global_timer_sane) { - ticks += slice_length - downcount; + ticks += time_slice[current_context] - downcounts[current_context]; } return ticks; } @@ -120,7 +124,7 @@ u64 CoreTiming::GetIdleTicks() const { } void CoreTiming::AddTicks(u64 ticks) { - downcount -= static_cast(ticks); + downcounts[current_context] -= static_cast(ticks); } void CoreTiming::ClearPendingEvents() { @@ -141,22 +145,36 @@ void CoreTiming::RemoveEvent(const EventType* event_type) { void CoreTiming::ForceExceptionCheck(s64 cycles) { cycles = std::max(0, cycles); - if (downcount <= cycles) { + if (downcounts[current_context] <= cycles) { return; } // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int // here. Account for cycles already executed by adjusting the g.slice_length - slice_length -= downcount - static_cast(cycles); - downcount = static_cast(cycles); + slice_length -= downcounts[current_context] - static_cast(cycles); + downcounts[current_context] = static_cast(cycles); +} + +std::optional CoreTiming::NextAvailableCore(const s64 needed_ticks) const { + const u64 original_context = current_context; + u64 next_context = (original_context + 1) % num_cpu_cores; + while (next_context != original_context) { + if (time_slice[next_context] >= needed_ticks) { + return {next_context}; + } else if (time_slice[next_context] >= 0) { + return {}; + } + next_context = (next_context + 1) % num_cpu_cores; + } + return {}; } void CoreTiming::Advance() { std::unique_lock guard(inner_mutex); - const int cycles_executed = slice_length - downcount; + const int cycles_executed = time_slice[current_context] - downcounts[current_context]; + time_slice[current_context] = std::max(0, downcounts[current_context]); global_timer += cycles_executed; - slice_length = MAX_SLICE_LENGTH; is_global_timer_sane = true; @@ -173,24 +191,40 @@ void CoreTiming::Advance() { // Still events left (scheduled in the future) if (!event_queue.empty()) { - slice_length = static_cast( - std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH)); + s64 needed_ticks = std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); + const auto next_core = NextAvailableCore(needed_ticks); + if (next_core) { + downcounts[*next_core] = needed_ticks; + } } - downcount = slice_length; + downcounts[current_context] = time_slice[current_context]; +} + +void CoreTiming::ResetRun() { + for (std::size_t core = 0; core < num_cpu_cores; core++) { + downcounts[core] = MAX_SLICE_LENGTH; + time_slice[core] = MAX_SLICE_LENGTH; + } + current_context = 0; + // Still events left (scheduled in the future) + if (!event_queue.empty()) { + s64 needed_ticks = std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); + downcounts[current_context] = needed_ticks; + } } void CoreTiming::Idle() { - idled_cycles += downcount; - downcount = 0; + idled_cycles += downcounts[current_context]; + downcounts[current_context] = 0; } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE}; } -int CoreTiming::GetDowncount() const { - return downcount; +s64 CoreTiming::GetDowncount() const { + return downcounts[current_context]; } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 161c7007da..ec0a6d2c09 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +105,19 @@ public: std::chrono::microseconds GetGlobalTimeUs() const; - int GetDowncount() const; + void ResetRun(); + + s64 GetDowncount() const; + + void SwitchContext(u64 new_context) { + current_context = new_context; + } + + bool CurrentContextCanRun() const { + return time_slice[current_context] > 0; + } + + std::optional NextAvailableCore(const s64 needed_ticks) const; private: struct Event; @@ -112,10 +125,15 @@ private: /// Clear all pending events. This should ONLY be done on exit. void ClearPendingEvents(); + static constexpr u64 num_cpu_cores = 4; + s64 global_timer = 0; s64 idled_cycles = 0; - int slice_length = 0; - int downcount = 0; + s64 slice_length = 0; + std::array downcounts{}; + // Slice of time assigned to each core per run. + std::array time_slice{}; + u64 current_context = 0; // Are we in a function that has been called from Advance() // If events are scheduled from a function that gets called from Advance(), diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 8fcb4eeb15..e022e6a608 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -6,6 +6,7 @@ #include "core/arm/exclusive_monitor.h" #include "core/core.h" #include "core/core_cpu.h" +#include "core/core_timing.h" #include "core/cpu_core_manager.h" #include "core/gdbstub/gdbstub.h" #include "core/settings.h" @@ -122,13 +123,19 @@ void CpuCoreManager::RunLoop(bool tight_loop) { } } - for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { - cores[active_core]->RunLoop(tight_loop); - if (Settings::values.use_multi_core) { - // Cores 1-3 are run on other threads in this mode - break; + auto& core_timing = system.CoreTiming(); + core_timing.ResetRun(); + bool keep_running{}; + do { + keep_running = false; + for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { + core_timing.SwitchContext(active_core); + if (core_timing.CurrentContextCanRun()) { + cores[active_core]->RunLoop(tight_loop); + } + keep_running |= core_timing.CurrentContextCanRun(); } - } + } while (keep_running); if (GDBStub::IsServerEnabled()) { GDBStub::SetCpuStepFlag(false); From c9a1129c95a4295a9e0c4ea2f420da7b1cfb3f3d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 30 Sep 2019 21:59:21 -0400 Subject: [PATCH 2/7] Tests: Eliminate old Core Timing Tests --- src/tests/core/core_timing.cpp | 193 --------------------------------- 1 file changed, 193 deletions(-) diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index f8be8fd19f..596a2f4aac 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -52,196 +52,3 @@ static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); REQUIRE(downcount == core_timing.GetDowncount()); } - -TEST_CASE("CoreTiming[BasicOrder]", "[core]") { - ScopeInit guard; - auto& core_timing = guard.core_timing; - - Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>); - Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>); - - // Enter slice 0 - core_timing.Advance(); - - // D -> B -> C -> A -> E - core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); - REQUIRE(1000 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); - REQUIRE(100 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); - REQUIRE(100 == core_timing.GetDowncount()); - - AdvanceAndCheck(core_timing, 3, 400); - AdvanceAndCheck(core_timing, 1, 300); - AdvanceAndCheck(core_timing, 2, 200); - AdvanceAndCheck(core_timing, 0, 200); - AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH); -} - -TEST_CASE("CoreTiming[Threadsave]", "[core]") { - ScopeInit guard; - auto& core_timing = guard.core_timing; - - Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>); - Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>); - - // Enter slice 0 - core_timing.Advance(); - - // D -> B -> C -> A -> E - core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); - // Manually force since ScheduleEvent doesn't call it - core_timing.ForceExceptionCheck(1000); - REQUIRE(1000 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); - // Manually force since ScheduleEvent doesn't call it - core_timing.ForceExceptionCheck(500); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); - // Manually force since ScheduleEvent doesn't call it - core_timing.ForceExceptionCheck(800); - REQUIRE(500 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); - // Manually force since ScheduleEvent doesn't call it - core_timing.ForceExceptionCheck(100); - REQUIRE(100 == core_timing.GetDowncount()); - core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); - // Manually force since ScheduleEvent doesn't call it - core_timing.ForceExceptionCheck(1200); - REQUIRE(100 == core_timing.GetDowncount()); - - AdvanceAndCheck(core_timing, 3, 400); - AdvanceAndCheck(core_timing, 1, 300); - AdvanceAndCheck(core_timing, 2, 200); - AdvanceAndCheck(core_timing, 0, 200); - AdvanceAndCheck(core_timing, 4, MAX_SLICE_LENGTH); -} - -namespace SharedSlotTest { -static unsigned int counter = 0; - -template -void FifoCallback(u64 userdata, s64 cycles_late) { - static_assert(ID < CB_IDS.size(), "ID out of range"); - callbacks_ran_flags.set(ID); - REQUIRE(CB_IDS[ID] == userdata); - REQUIRE(ID == counter); - REQUIRE(lateness == cycles_late); - ++counter; -} -} // namespace SharedSlotTest - -TEST_CASE("CoreTiming[SharedSlot]", "[core]") { - using namespace SharedSlotTest; - - ScopeInit guard; - auto& core_timing = guard.core_timing; - - Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", FifoCallback<0>); - Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", FifoCallback<1>); - Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", FifoCallback<2>); - Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", FifoCallback<3>); - Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", FifoCallback<4>); - - core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); - core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]); - core_timing.ScheduleEvent(1000, cb_c, CB_IDS[2]); - core_timing.ScheduleEvent(1000, cb_d, CB_IDS[3]); - core_timing.ScheduleEvent(1000, cb_e, CB_IDS[4]); - - // Enter slice 0 - core_timing.Advance(); - REQUIRE(1000 == core_timing.GetDowncount()); - - callbacks_ran_flags = 0; - counter = 0; - lateness = 0; - core_timing.AddTicks(core_timing.GetDowncount()); - core_timing.Advance(); - REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount()); - REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong()); -} - -TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { - ScopeInit guard; - auto& core_timing = guard.core_timing; - - Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); - - // Enter slice 0 - core_timing.Advance(); - - core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); - core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); - - AdvanceAndCheck(core_timing, 0, 90, 10, -10); // (100 - 10) - AdvanceAndCheck(core_timing, 1, MAX_SLICE_LENGTH, 50, -50); -} - -namespace ChainSchedulingTest { -static int reschedules = 0; - -static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata, - s64 cycles_late) { - --reschedules; - REQUIRE(reschedules >= 0); - REQUIRE(lateness == cycles_late); - - if (reschedules > 0) { - core_timing.ScheduleEvent(1000, reinterpret_cast(userdata), - userdata); - } -} -} // namespace ChainSchedulingTest - -TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { - using namespace ChainSchedulingTest; - - ScopeInit guard; - auto& core_timing = guard.core_timing; - - Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); - Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); - Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); - Core::Timing::EventType* cb_rs = core_timing.RegisterEvent( - "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) { - RescheduleCallback(core_timing, userdata, cycles_late); - }); - - // Enter slice 0 - core_timing.Advance(); - - core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]); - core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]); - core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]); - core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast(cb_rs)); - REQUIRE(800 == core_timing.GetDowncount()); - - reschedules = 3; - AdvanceAndCheck(core_timing, 0, 200); // cb_a - AdvanceAndCheck(core_timing, 1, 1000); // cb_b, cb_rs - REQUIRE(2 == reschedules); - - core_timing.AddTicks(core_timing.GetDowncount()); - core_timing.Advance(); // cb_rs - REQUIRE(1 == reschedules); - REQUIRE(200 == core_timing.GetDowncount()); - - AdvanceAndCheck(core_timing, 2, 800); // cb_c - - core_timing.AddTicks(core_timing.GetDowncount()); - core_timing.Advance(); // cb_rs - REQUIRE(0 == reschedules); - REQUIRE(MAX_SLICE_LENGTH == core_timing.GetDowncount()); -} From 65aff6930bdfe1ecbf7e3c7eec967c2c6149aaef Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 8 Oct 2019 17:18:06 -0400 Subject: [PATCH 3/7] Core Timing: General corrections and added tests. --- src/core/core_timing.cpp | 15 +++- src/core/core_timing.h | 1 + src/tests/core/core_timing.cpp | 156 ++++++++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 7 deletions(-) diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 6da2dcfb46..0ed6f9b193 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -13,6 +13,8 @@ #include "common/thread.h" #include "core/core_timing_util.h" +#pragma optoimize("", off) + namespace Core::Timing { constexpr int MAX_SLICE_LENGTH = 10000; @@ -114,7 +116,7 @@ void CoreTiming::UnscheduleEvent(const EventType* event_type, u64 userdata) { u64 CoreTiming::GetTicks() const { u64 ticks = static_cast(global_timer); if (!is_global_timer_sane) { - ticks += time_slice[current_context] - downcounts[current_context]; + ticks += accumulated_ticks; } return ticks; } @@ -124,6 +126,7 @@ u64 CoreTiming::GetIdleTicks() const { } void CoreTiming::AddTicks(u64 ticks) { + accumulated_ticks += ticks; downcounts[current_context] -= static_cast(ticks); } @@ -151,7 +154,6 @@ void CoreTiming::ForceExceptionCheck(s64 cycles) { // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int // here. Account for cycles already executed by adjusting the g.slice_length - slice_length -= downcounts[current_context] - static_cast(cycles); downcounts[current_context] = static_cast(cycles); } @@ -172,8 +174,8 @@ std::optional CoreTiming::NextAvailableCore(const s64 needed_ticks) const { void CoreTiming::Advance() { std::unique_lock guard(inner_mutex); - const int cycles_executed = time_slice[current_context] - downcounts[current_context]; - time_slice[current_context] = std::max(0, downcounts[current_context]); + const int cycles_executed = accumulated_ticks; + time_slice[current_context] = std::max(0, time_slice[current_context] - accumulated_ticks); global_timer += cycles_executed; is_global_timer_sane = true; @@ -198,6 +200,8 @@ void CoreTiming::Advance() { } } + accumulated_ticks = 0; + downcounts[current_context] = time_slice[current_context]; } @@ -212,6 +216,9 @@ void CoreTiming::ResetRun() { s64 needed_ticks = std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); downcounts[current_context] = needed_ticks; } + + is_global_timer_sane = false; + accumulated_ticks = 0; } void CoreTiming::Idle() { diff --git a/src/core/core_timing.h b/src/core/core_timing.h index ec0a6d2c09..8bba45beba 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -130,6 +130,7 @@ private: s64 global_timer = 0; s64 idled_cycles = 0; s64 slice_length = 0; + u64 accumulated_ticks = 0; std::array downcounts{}; // Slice of time assigned to each core per run. std::array time_slice{}; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 596a2f4aac..467eb4736a 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include "common/file_util.h" #include "core/core.h" @@ -13,7 +14,7 @@ // Numbers are chosen randomly to make sure the correct one is given. static constexpr std::array CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; -static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals +static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals static std::bitset callbacks_ran_flags; static u64 expected_callback = 0; @@ -28,6 +29,12 @@ void CallbackTemplate(u64 userdata, s64 cycles_late) { REQUIRE(lateness == cycles_late); } +static u64 callbacks_done = 0; + +void EmptyCallback(u64 userdata, s64 cycles_late) { + ++callbacks_done; +} + struct ScopeInit final { ScopeInit() { core_timing.Initialize(); @@ -39,16 +46,159 @@ struct ScopeInit final { Core::Timing::CoreTiming core_timing; }; -static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, int downcount, +static void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, int expected_lateness = 0, int cpu_downcount = 0) { callbacks_ran_flags = 0; expected_callback = CB_IDS[idx]; lateness = expected_lateness; // Pretend we executed X cycles of instructions. + core_timing.SwitchContext(context); core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount); core_timing.Advance(); + core_timing.SwitchContext((context + 1) % 4); REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); - REQUIRE(downcount == core_timing.GetDowncount()); +} + +TEST_CASE("CoreTiming[BasicOrder]", "[core]") { + ScopeInit guard; + auto& core_timing = guard.core_timing; + + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_d = core_timing.RegisterEvent("callbackD", CallbackTemplate<3>); + Core::Timing::EventType* cb_e = core_timing.RegisterEvent("callbackE", CallbackTemplate<4>); + + // Enter slice 0 + core_timing.ResetRun(); + + // D -> B -> C -> A -> E + core_timing.SwitchContext(0); + core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); + REQUIRE(1000 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); + REQUIRE(500 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); + REQUIRE(100 == core_timing.GetDowncount()); + core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); + REQUIRE(100 == core_timing.GetDowncount()); + + AdvanceAndCheck(core_timing, 3, 0); + AdvanceAndCheck(core_timing, 1, 1); + AdvanceAndCheck(core_timing, 2, 2); + AdvanceAndCheck(core_timing, 0, 3); + AdvanceAndCheck(core_timing, 4, 0); +} + +TEST_CASE("CoreTiming[FairSharing]", "[core]") { + + ScopeInit guard; + auto& core_timing = guard.core_timing; + + Core::Timing::EventType* empty_callback = + core_timing.RegisterEvent("empty_callback", EmptyCallback); + + callbacks_done = 0; + u64 MAX_CALLBACKS = 10; + for (std::size_t i = 0; i < 10; i++) { + core_timing.ScheduleEvent(i * 3333U, empty_callback, 0); + } + + const s64 advances = MAX_SLICE_LENGTH / 10; + core_timing.ResetRun(); + u64 current_time = core_timing.GetTicks(); + bool keep_running{}; + do { + keep_running = false; + for (u32 active_core = 0; active_core < 4; ++active_core) { + core_timing.SwitchContext(active_core); + if (core_timing.CurrentContextCanRun()) { + core_timing.AddTicks(std::min(advances, core_timing.GetDowncount())); + core_timing.Advance(); + } + keep_running |= core_timing.CurrentContextCanRun(); + } + } while (keep_running); + u64 current_time_2 = core_timing.GetTicks(); + + REQUIRE(MAX_CALLBACKS == callbacks_done); + REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4); +} + +TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { + ScopeInit guard; + auto& core_timing = guard.core_timing; + + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + + // Enter slice 0 + core_timing.ResetRun(); + + core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); + core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); + + AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10) + AdvanceAndCheck(core_timing, 1, 1, 50, -50); +} + +namespace ChainSchedulingTest { +static int reschedules = 0; + +static void RescheduleCallback(Core::Timing::CoreTiming& core_timing, u64 userdata, + s64 cycles_late) { + --reschedules; + REQUIRE(reschedules >= 0); + REQUIRE(lateness == cycles_late); + + if (reschedules > 0) { + core_timing.ScheduleEvent(1000, reinterpret_cast(userdata), + userdata); + } +} +} // namespace ChainSchedulingTest + +TEST_CASE("CoreTiming[ChainScheduling]", "[core]") { + using namespace ChainSchedulingTest; + + ScopeInit guard; + auto& core_timing = guard.core_timing; + + Core::Timing::EventType* cb_a = core_timing.RegisterEvent("callbackA", CallbackTemplate<0>); + Core::Timing::EventType* cb_b = core_timing.RegisterEvent("callbackB", CallbackTemplate<1>); + Core::Timing::EventType* cb_c = core_timing.RegisterEvent("callbackC", CallbackTemplate<2>); + Core::Timing::EventType* cb_rs = core_timing.RegisterEvent( + "callbackReschedule", [&core_timing](u64 userdata, s64 cycles_late) { + RescheduleCallback(core_timing, userdata, cycles_late); + }); + + // Enter slice 0 + core_timing.ResetRun(); + + core_timing.ScheduleEvent(800, cb_a, CB_IDS[0]); + core_timing.ScheduleEvent(1000, cb_b, CB_IDS[1]); + core_timing.ScheduleEvent(2200, cb_c, CB_IDS[2]); + core_timing.ScheduleEvent(1000, cb_rs, reinterpret_cast(cb_rs)); + REQUIRE(800 == core_timing.GetDowncount()); + + reschedules = 3; + AdvanceAndCheck(core_timing, 0, 0); // cb_a + AdvanceAndCheck(core_timing, 1, 1); // cb_b, cb_rs + REQUIRE(2 == reschedules); + + core_timing.AddTicks(core_timing.GetDowncount()); + core_timing.Advance(); // cb_rs + core_timing.SwitchContext(3); + REQUIRE(1 == reschedules); + REQUIRE(200 == core_timing.GetDowncount()); + + AdvanceAndCheck(core_timing, 2, 3); // cb_c + + core_timing.AddTicks(core_timing.GetDowncount()); + core_timing.Advance(); // cb_rs + REQUIRE(0 == reschedules); } From 96f2b16356dfee3b4b1c75f8ef96d81e7923dfc9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 8 Oct 2019 18:29:30 -0400 Subject: [PATCH 4/7] Core Timing: Correct Idle and remove lefting pragma --- src/core/core_timing.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 0ed6f9b193..3ca265b4f2 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -13,8 +13,6 @@ #include "common/thread.h" #include "core/core_timing_util.h" -#pragma optoimize("", off) - namespace Core::Timing { constexpr int MAX_SLICE_LENGTH = 10000; @@ -222,6 +220,7 @@ void CoreTiming::ResetRun() { } void CoreTiming::Idle() { + accumulated_ticks += downcounts[current_context]; idled_cycles += downcounts[current_context]; downcounts[current_context] = 0; } From e0650a2034026d8292196128d2f9decb50eeb0f3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 11 Oct 2019 14:44:14 -0400 Subject: [PATCH 5/7] Core_Timing: Address Feedback and suppress warnings. --- src/core/arm/dynarmic/arm_dynarmic.cpp | 2 +- src/core/arm/unicorn/arm_unicorn.cpp | 2 +- src/core/core_timing.cpp | 15 +++++++-------- src/core/core_timing.h | 2 +- src/core/cpu_core_manager.cpp | 4 ++-- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 4d2e99ed00..700c4afff9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -116,7 +116,7 @@ public: num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { - return std::max(parent.system.CoreTiming().GetDowncount(), 0LL); + return std::max(parent.system.CoreTiming().GetDowncount(), s64{0}); } u64 GetCNTPCT() override { return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index 3f91b06d4a..d4f41bfc18 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -156,7 +156,7 @@ void ARM_Unicorn::Run() { if (GDBStub::IsServerEnabled()) { ExecuteInstructions(std::max(4000000, 0)); } else { - ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), 0LL)); + ExecuteInstructions(std::max(system.CoreTiming().GetDowncount(), s64{0})); } } diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 3ca265b4f2..780c6843a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -38,10 +38,8 @@ CoreTiming::CoreTiming() = default; CoreTiming::~CoreTiming() = default; void CoreTiming::Initialize() { - for (std::size_t core = 0; core < num_cpu_cores; core++) { - downcounts[core] = MAX_SLICE_LENGTH; - time_slice[core] = MAX_SLICE_LENGTH; - } + downcounts.fill(MAX_SLICE_LENGTH); + time_slice.fill(MAX_SLICE_LENGTH); slice_length = MAX_SLICE_LENGTH; global_timer = 0; idled_cycles = 0; @@ -162,17 +160,17 @@ std::optional CoreTiming::NextAvailableCore(const s64 needed_ticks) const { if (time_slice[next_context] >= needed_ticks) { return {next_context}; } else if (time_slice[next_context] >= 0) { - return {}; + return std::nullopt; } next_context = (next_context + 1) % num_cpu_cores; } - return {}; + return std::nullopt; } void CoreTiming::Advance() { std::unique_lock guard(inner_mutex); - const int cycles_executed = accumulated_ticks; + const u64 cycles_executed = accumulated_ticks; time_slice[current_context] = std::max(0, time_slice[current_context] - accumulated_ticks); global_timer += cycles_executed; @@ -191,7 +189,8 @@ void CoreTiming::Advance() { // Still events left (scheduled in the future) if (!event_queue.empty()) { - s64 needed_ticks = std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); + const s64 needed_ticks = + std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); const auto next_core = NextAvailableCore(needed_ticks); if (next_core) { downcounts[*next_core] = needed_ticks; diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 8bba45beba..3bb88c810e 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -113,7 +113,7 @@ public: current_context = new_context; } - bool CurrentContextCanRun() const { + bool CanCurrentContextRun() const { return time_slice[current_context] > 0; } diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index e022e6a608..16b384076e 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -130,10 +130,10 @@ void CpuCoreManager::RunLoop(bool tight_loop) { keep_running = false; for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { core_timing.SwitchContext(active_core); - if (core_timing.CurrentContextCanRun()) { + if (core_timing.CanCurrentContextRun()) { cores[active_core]->RunLoop(tight_loop); } - keep_running |= core_timing.CurrentContextCanRun(); + keep_running |= core_timing.CanCurrentContextRun(); } } while (keep_running); From 91f6333e238eb3be972c4f5e1634948a0e414ba4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 07:23:08 -0400 Subject: [PATCH 6/7] Core_Timing: Fix tests. --- src/tests/core/core_timing.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index 467eb4736a..3443bf05e9 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -116,11 +116,11 @@ TEST_CASE("CoreTiming[FairSharing]", "[core]") { keep_running = false; for (u32 active_core = 0; active_core < 4; ++active_core) { core_timing.SwitchContext(active_core); - if (core_timing.CurrentContextCanRun()) { + if (core_timing.CanCurrentContextRun()) { core_timing.AddTicks(std::min(advances, core_timing.GetDowncount())); core_timing.Advance(); } - keep_running |= core_timing.CurrentContextCanRun(); + keep_running |= core_timing.CanCurrentContextRun(); } } while (keep_running); u64 current_time_2 = core_timing.GetTicks(); From a4ae11d63e83323c30e07f3eef1cc7e7829df6df Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 07:26:38 -0400 Subject: [PATCH 7/7] Core_Timing: Address Remaining feedback. --- src/core/core_timing.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 780c6843a4..0e95706856 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -203,14 +203,13 @@ void CoreTiming::Advance() { } void CoreTiming::ResetRun() { - for (std::size_t core = 0; core < num_cpu_cores; core++) { - downcounts[core] = MAX_SLICE_LENGTH; - time_slice[core] = MAX_SLICE_LENGTH; - } + downcounts.fill(MAX_SLICE_LENGTH); + time_slice.fill(MAX_SLICE_LENGTH); current_context = 0; // Still events left (scheduled in the future) if (!event_queue.empty()) { - s64 needed_ticks = std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); + const s64 needed_ticks = + std::min(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); downcounts[current_context] = needed_ticks; }