From b164d8ee536dba526f9da2083433d529daf7b37b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:01:17 -0400
Subject: [PATCH 01/29] Implement a new Core Scheduler

---
 src/core/hle/kernel/scheduler.cpp | 455 +++++++++++++++++++-----------
 src/core/hle/kernel/scheduler.h   | 234 +++++++--------
 2 files changed, 421 insertions(+), 268 deletions(-)
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index e8447b69a1..878aeed6d6 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -3,6 +3,8 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <set>
+#include <unordered_set>
 #include <utility>
 
 #include "common/assert.h"
@@ -17,57 +19,286 @@
 
 namespace Kernel {
 
-std::mutex Scheduler::scheduler_mutex;
+void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
+    thread_list.push_back(std::move(thread));
+}
 
-Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
-    : cpu_core{cpu_core}, system{system} {}
+void GlobalScheduler::RemoveThread(Thread* thread) {
+    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
+                      thread_list.end());
+}
 
-Scheduler::~Scheduler() {
-    for (auto& thread : thread_list) {
-        thread->Stop();
+/*
+ * SelectThreads, Yield functions originally by TuxSH.
+ * licensed under GPLv2 or later under exception provided by the author.
+ */
+
+void GlobalScheduler::UnloadThread(s32 core) {
+    Scheduler& sched = Core::System::GetInstance().Scheduler(core);
+    sched.UnloadThread();
+}
+
+void GlobalScheduler::SelectThread(u32 core) {
+    auto update_thread = [](Thread* thread, Scheduler& sched) {
+        if (thread != sched.selected_thread) {
+            if (thread == nullptr) {
+                ++sched.idle_selection_count;
+            }
+            sched.selected_thread = thread;
+        }
+        sched.context_switch_pending = sched.selected_thread != sched.current_thread;
+        std::atomic_thread_fence(std::memory_order_seq_cst);
+    };
+    Scheduler& sched = Core::System::GetInstance().Scheduler(core);
+    Thread* current_thread = nullptr;
+    current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
+    if (!current_thread) {
+        Thread* winner = nullptr;
+        std::set<s32> sug_cores;
+        for (auto thread : suggested_queue[core]) {
+            s32 this_core = thread->GetProcessorID();
+            Thread* thread_on_core = nullptr;
+            if (this_core >= 0) {
+                thread_on_core = scheduled_queue[this_core].front();
+            }
+            if (this_core < 0 || thread != thread_on_core) {
+                winner = thread;
+                break;
+            }
+            sug_cores.insert(this_core);
+        }
+        if (winner && winner->GetPriority() > 2) {
+            if (winner->IsRunning()) {
+                UnloadThread(winner->GetProcessorID());
+            }
+            TransferToCore(winner->GetPriority(), core, winner);
+            current_thread = winner;
+        } else {
+            for (auto& src_core : sug_cores) {
+                auto it = scheduled_queue[src_core].begin();
+                it++;
+                if (it != scheduled_queue[src_core].end()) {
+                    Thread* thread_on_core = scheduled_queue[src_core].front();
+                    Thread* to_change = *it;
+                    if (thread_on_core->IsRunning() || to_change->IsRunning()) {
+                        UnloadThread(src_core);
+                    }
+                    TransferToCore(thread_on_core->GetPriority(), core, thread_on_core);
+                    current_thread = thread_on_core;
+                }
+            }
+        }
+    }
+    update_thread(current_thread, sched);
+}
+
+void GlobalScheduler::SelectThreads() {
+    auto update_thread = [](Thread* thread, Scheduler& sched) {
+        if (thread != sched.selected_thread) {
+            if (thread == nullptr) {
+                ++sched.idle_selection_count;
+            }
+            sched.selected_thread = thread;
+        }
+        sched.context_switch_pending = sched.selected_thread != sched.current_thread;
+        std::atomic_thread_fence(std::memory_order_seq_cst);
+    };
+
+    auto& system = Core::System::GetInstance();
+
+    std::unordered_set<Thread*> picked_threads;
+    // This maintain the "current thread is on front of queue" invariant
+    std::array<Thread*, NUM_CPU_CORES> current_threads;
+    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        Scheduler& sched = system.Scheduler(i);
+        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
+        if (current_threads[i])
+            picked_threads.insert(current_threads[i]);
+        update_thread(current_threads[i], sched);
+    }
+
+    // Do some load-balancing. Allow second pass.
+    std::array<Thread*, NUM_CPU_CORES> current_threads_2 = current_threads;
+    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        if (!scheduled_queue[i].empty()) {
+            continue;
+        }
+        Thread* winner = nullptr;
+        for (auto thread : suggested_queue[i]) {
+            if (thread->GetProcessorID() < 0 || thread != current_threads[i]) {
+                if (picked_threads.count(thread) == 0 && !thread->IsRunning()) {
+                    winner = thread;
+                    break;
+                }
+            }
+        }
+        if (winner) {
+            TransferToCore(winner->GetPriority(), i, winner);
+            current_threads_2[i] = winner;
+            picked_threads.insert(winner);
+        }
+    }
+
+    // See which to-be-current threads have changed & update accordingly
+    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        Scheduler& sched = system.Scheduler(i);
+        if (current_threads_2[i] != current_threads[i]) {
+            update_thread(current_threads_2[i], sched);
+        }
+    }
+
+    reselection_pending.store(false, std::memory_order_release);
+}
+
+void GlobalScheduler::YieldThread(Thread* yielding_thread) {
+    // Note: caller should use critical section, etc.
+    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    u32 priority = yielding_thread->GetPriority();
+
+    // Yield the thread
+    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
+               "Thread yielding without being in front");
+    scheduled_queue[core_id].yield(priority);
+
+    Thread* winner = scheduled_queue[core_id].front(priority);
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
+    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
+    // etc.
+    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    u32 priority = yielding_thread->GetPriority();
+
+    // Yield the thread
+    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
+               "Thread yielding without being in front");
+    scheduled_queue[core_id].yield(priority);
+
+    std::array<Thread*, NUM_CPU_CORES> current_threads;
+    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
+    }
+
+    Thread* next_thread = scheduled_queue[core_id].front(priority);
+    Thread* winner = nullptr;
+    for (auto& thread : suggested_queue[core_id]) {
+        s32 source_core = thread->GetProcessorID();
+        if (source_core >= 0) {
+            if (current_threads[source_core] != nullptr) {
+                if (thread == current_threads[source_core] ||
+                    current_threads[source_core]->GetPriority() < min_regular_priority)
+                    continue;
+            }
+            if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
+                next_thread->GetPriority() < thread->GetPriority()) {
+                if (thread->GetPriority() <= priority) {
+                    winner = thread;
+                    break;
+                }
+            }
+        }
+    }
+
+    if (winner != nullptr) {
+        if (winner != yielding_thread) {
+            if (winner->IsRunning())
+                UnloadThread(winner->GetProcessorID());
+            TransferToCore(winner->GetPriority(), core_id, winner);
+        }
+    } else {
+        winner = next_thread;
+    }
+
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
+    // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
+    // etc.
+    Thread* winner = nullptr;
+    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+
+    // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
+    TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
+
+    // If the core is idle, perform load balancing, excluding the threads that have just used this
+    // function...
+    if (scheduled_queue[core_id].empty()) {
+        // Here, "current_threads" is calculated after the ""yield"", unlike yield -1
+        std::array<Thread*, NUM_CPU_CORES> current_threads;
+        for (u32 i = 0; i < NUM_CPU_CORES; i++) {
+            current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
+        }
+        for (auto& thread : suggested_queue[core_id]) {
+            s32 source_core = thread->GetProcessorID();
+            if (source_core < 0 || thread == current_threads[source_core])
+                continue;
+            if (current_threads[source_core] == nullptr ||
+                current_threads[source_core]->GetPriority() >= min_regular_priority) {
+                winner = thread;
+            }
+            break;
+        }
+        if (winner != nullptr) {
+            if (winner != yielding_thread) {
+                if (winner->IsRunning())
+                    UnloadThread(winner->GetProcessorID());
+                TransferToCore(winner->GetPriority(), core_id, winner);
+            }
+        } else {
+            winner = yielding_thread;
+        }
+    }
+
+    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+}
+
+void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
+    if (current_thread == winner) {
+        // Nintendo (not us) has a nullderef bug on current_thread->owner, but which is never
+        // triggered.
+        // current_thread->SetRedundantSchedulerOperation();
+    } else {
+        reselection_pending.store(true, std::memory_order_release);
     }
 }
 
+GlobalScheduler::~GlobalScheduler() = default;
+
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 id)
+    : system(system), cpu_core(cpu_core), id(id) {}
+
+Scheduler::~Scheduler() {}
+
 bool Scheduler::HaveReadyThreads() const {
-    std::lock_guard lock{scheduler_mutex};
-    return !ready_queue.empty();
+    return system.GlobalScheduler().HaveReadyThreads(id);
 }
 
 Thread* Scheduler::GetCurrentThread() const {
     return current_thread.get();
 }
 
+Thread* Scheduler::GetSelectedThread() const {
+    return selected_thread.get();
+}
+
+void Scheduler::SelectThreads() {
+    system.GlobalScheduler().SelectThread(id);
+}
+
 u64 Scheduler::GetLastContextSwitchTicks() const {
     return last_context_switch_time;
 }
 
-Thread* Scheduler::PopNextReadyThread() {
-    Thread* next = nullptr;
-    Thread* thread = GetCurrentThread();
-
-    if (thread && thread->GetStatus() == ThreadStatus::Running) {
-        if (ready_queue.empty()) {
-            return thread;
-        }
-        // We have to do better than the current thread.
-        // This call returns null when that's not possible.
-        next = ready_queue.front();
-        if (next == nullptr || next->GetPriority() >= thread->GetPriority()) {
-            next = thread;
-        }
-    } else {
-        if (ready_queue.empty()) {
-            return nullptr;
-        }
-        next = ready_queue.front();
-    }
-
-    return next;
+void Scheduler::TryDoContextSwitch() {
+    if (context_switch_pending)
+        SwitchContext();
 }
 
-void Scheduler::SwitchContext(Thread* new_thread) {
-    Thread* previous_thread = GetCurrentThread();
-    Process* const previous_process = system.Kernel().CurrentProcess();
+void Scheduler::UnloadThread() {
+    Thread* const previous_thread = GetCurrentThread();
+    Process* const previous_process = Core::CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
 
@@ -80,23 +311,51 @@ void Scheduler::SwitchContext(Thread* new_thread) {
         if (previous_thread->GetStatus() == ThreadStatus::Running) {
             // This is only the case when a reschedule is triggered without the current thread
             // yielding execution (i.e. an event triggered, system core time-sliced, etc)
-            ready_queue.add(previous_thread, previous_thread->GetPriority(), false);
             previous_thread->SetStatus(ThreadStatus::Ready);
         }
+        previous_thread->SetIsRunning(false);
+    }
+    current_thread = nullptr;
+}
+
+void Scheduler::SwitchContext() {
+    Thread* const previous_thread = GetCurrentThread();
+    Thread* const new_thread = GetSelectedThread();
+
+    context_switch_pending = false;
+    if (new_thread == previous_thread)
+        return;
+
+    Process* const previous_process = Core::CurrentProcess();
+
+    UpdateLastContextSwitchTime(previous_thread, previous_process);
+
+    // Save context for previous thread
+    if (previous_thread) {
+        cpu_core.SaveContext(previous_thread->GetContext());
+        // Save the TPIDR_EL0 system register in case it was modified.
+        previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+
+        if (previous_thread->GetStatus() == ThreadStatus::Running) {
+            // This is only the case when a reschedule is triggered without the current thread
+            // yielding execution (i.e. an event triggered, system core time-sliced, etc)
+            previous_thread->SetStatus(ThreadStatus::Ready);
+        }
+        previous_thread->SetIsRunning(false);
     }
 
     // Load context of new thread
     if (new_thread) {
+        ASSERT_MSG(new_thread->GetProcessorID() == this->id,
+                   "Thread must be assigned to this core.");
         ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
                    "Thread must be ready to become running.");
 
         // Cancel any outstanding wakeup events for this thread
         new_thread->CancelWakeupTimer();
-
         current_thread = new_thread;
-
-        ready_queue.remove(new_thread, new_thread->GetPriority());
         new_thread->SetStatus(ThreadStatus::Running);
+        new_thread->SetIsRunning(true);
 
         auto* const thread_owner_process = current_thread->GetOwnerProcess();
         if (previous_process != thread_owner_process) {
@@ -116,7 +375,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
+    const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
     const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
 
     if (thread != nullptr) {
@@ -130,124 +389,4 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     last_context_switch_time = most_recent_switch_ticks;
 }
 
-void Scheduler::Reschedule() {
-    std::lock_guard lock{scheduler_mutex};
-
-    Thread* cur = GetCurrentThread();
-    Thread* next = PopNextReadyThread();
-
-    if (cur && next) {
-        LOG_TRACE(Kernel, "context switch {} -> {}", cur->GetObjectId(), next->GetObjectId());
-    } else if (cur) {
-        LOG_TRACE(Kernel, "context switch {} -> idle", cur->GetObjectId());
-    } else if (next) {
-        LOG_TRACE(Kernel, "context switch idle -> {}", next->GetObjectId());
-    }
-
-    SwitchContext(next);
-}
-
-void Scheduler::AddThread(SharedPtr<Thread> thread) {
-    std::lock_guard lock{scheduler_mutex};
-
-    thread_list.push_back(std::move(thread));
-}
-
-void Scheduler::RemoveThread(Thread* thread) {
-    std::lock_guard lock{scheduler_mutex};
-
-    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
-                      thread_list.end());
-}
-
-void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-
-    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.add(thread, priority);
-}
-
-void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-
-    ASSERT(thread->GetStatus() == ThreadStatus::Ready);
-    ready_queue.remove(thread, priority);
-}
-
-void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
-    std::lock_guard lock{scheduler_mutex};
-    if (thread->GetPriority() == priority) {
-        return;
-    }
-
-    // If thread was ready, adjust queues
-    if (thread->GetStatus() == ThreadStatus::Ready)
-        ready_queue.adjust(thread, thread->GetPriority(), priority);
-}
-
-Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const {
-    std::lock_guard lock{scheduler_mutex};
-
-    const u32 mask = 1U << core;
-    for (auto* thread : ready_queue) {
-        if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) {
-            return thread;
-        }
-    }
-    return nullptr;
-}
-
-void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
-    ASSERT(thread != nullptr);
-    // Avoid yielding if the thread isn't even running.
-    ASSERT(thread->GetStatus() == ThreadStatus::Running);
-
-    // Sanity check that the priority is valid
-    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
-
-    // Yield this thread -- sleep for zero time and force reschedule to different thread
-    GetCurrentThread()->Sleep(0);
-}
-
-void Scheduler::YieldWithLoadBalancing(Thread* thread) {
-    ASSERT(thread != nullptr);
-    const auto priority = thread->GetPriority();
-    const auto core = static_cast<u32>(thread->GetProcessorID());
-
-    // Avoid yielding if the thread isn't even running.
-    ASSERT(thread->GetStatus() == ThreadStatus::Running);
-
-    // Sanity check that the priority is valid
-    ASSERT(priority < THREADPRIO_COUNT);
-
-    // Sleep for zero time to be able to force reschedule to different thread
-    GetCurrentThread()->Sleep(0);
-
-    Thread* suggested_thread = nullptr;
-
-    // Search through all of the cpu cores (except this one) for a suggested thread.
-    // Take the first non-nullptr one
-    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
-        const auto res =
-            system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
-
-        // If scheduler provides a suggested thread
-        if (res != nullptr) {
-            // And its better than the current suggested thread (or is the first valid one)
-            if (suggested_thread == nullptr ||
-                suggested_thread->GetPriority() > res->GetPriority()) {
-                suggested_thread = res;
-            }
-        }
-    }
-
-    // If a suggested thread was found, queue that for this core
-    if (suggested_thread != nullptr)
-        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
-}
-
-void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
-    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
-}
-
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index b29bf7be85..50fa7376b4 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -20,124 +20,141 @@ namespace Kernel {
 
 class Process;
 
-class Scheduler final {
+class GlobalScheduler final {
 public:
-    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
-    ~Scheduler();
-
-    /// Returns whether there are any threads that are ready to run.
-    bool HaveReadyThreads() const;
-
-    /// Reschedules to the next available thread (call after current thread is suspended)
-    void Reschedule();
-
-    /// Gets the current running thread
-    Thread* GetCurrentThread() const;
-
-    /// Gets the timestamp for the last context switch in ticks.
-    u64 GetLastContextSwitchTicks() const;
+    static constexpr u32 NUM_CPU_CORES = 4;
 
+    GlobalScheduler() {
+        reselection_pending = false;
+    }
+    ~GlobalScheduler();
     /// Adds a new thread to the scheduler
     void AddThread(SharedPtr<Thread> thread);
 
     /// Removes a thread from the scheduler
     void RemoveThread(Thread* thread);
 
-    /// Schedules a thread that has become "ready"
-    void ScheduleThread(Thread* thread, u32 priority);
-
-    /// Unschedules a thread that was already scheduled
-    void UnscheduleThread(Thread* thread, u32 priority);
-
-    /// Sets the priority of a thread in the scheduler
-    void SetThreadPriority(Thread* thread, u32 priority);
-
-    /// Gets the next suggested thread for load balancing
-    Thread* GetNextSuggestedThread(u32 core, u32 minimum_priority) const;
-
-    /**
-     * YieldWithoutLoadBalancing -- analogous to normal yield on a system
-     * Moves the thread to the end of the ready queue for its priority, and then reschedules the
-     * system to the new head of the queue.
-     *
-     * Example (Single Core -- but can be extrapolated to multi):
-     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
-     * Currently Running: ThreadR
-     *
-     * ThreadR calls YieldWithoutLoadBalancing
-     *
-     * ThreadR is moved to the end of ready_queue[prio=0]:
-     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
-     * Currently Running: Nothing
-     *
-     * System is rescheduled (ThreadA is popped off of queue):
-     * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
-     * Currently Running: ThreadA
-     *
-     * If the queue is empty at time of call, no yielding occurs. This does not cross between cores
-     * or priorities at all.
-     */
-    void YieldWithoutLoadBalancing(Thread* thread);
-
-    /**
-     * YieldWithLoadBalancing -- yield but with better selection of the new running thread
-     * Moves the current thread to the end of the ready queue for its priority, then selects a
-     * 'suggested thread' (a thread on a different core that could run on this core) from the
-     * scheduler, changes its core, and reschedules the current core to that thread.
-     *
-     * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
-     * single core):
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
-     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
-     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
-     *
-     * ThreadQ calls YieldWithLoadBalancing
-     *
-     * ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB
-     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
-     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
-     *
-     * A list of suggested threads for each core is compiled
-     * Suggested Threads: {ThreadC on Core 1}
-     * If this were quad core (as the switch is), there could be between 0 and 3 threads in this
-     * list. If there are more than one, the thread is selected by highest prio.
-     *
-     * ThreadC is core changed to Core 0:
-     * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
-     * ready_queue[core=1][prio=0]: ThreadD
-     * Currently Running: None on Core 0 || ThreadP on Core 1
-     *
-     * System is rescheduled (ThreadC is popped off of queue):
-     * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
-     * ready_queue[core=1][prio=0]: ThreadD
-     * Currently Running: ThreadC on Core 0 || ThreadP on Core 1
-     *
-     * If no suggested threads can be found this will behave just as normal yield. If there are
-     * multiple candidates for the suggested thread on a core, the highest prio is taken.
-     */
-    void YieldWithLoadBalancing(Thread* thread);
-
-    /// Currently unknown -- asserts as unimplemented on call
-    void YieldAndWaitForLoadBalancing(Thread* thread);
-
     /// Returns a list of all threads managed by the scheduler
     const std::vector<SharedPtr<Thread>>& GetThreadList() const {
         return thread_list;
     }
 
-private:
-    /**
-     * Pops and returns the next thread from the thread queue
-     * @return A pointer to the next ready thread
-     */
-    Thread* PopNextReadyThread();
+    void Suggest(u32 priority, u32 core, Thread* thread) {
+        suggested_queue[core].add(thread, priority);
+    }
 
+    void Unsuggest(u32 priority, u32 core, Thread* thread) {
+        suggested_queue[core].remove(thread, priority);
+    }
+
+    void Schedule(u32 priority, u32 core, Thread* thread) {
+        ASSERT_MSG(thread->GetProcessorID() == core,
+                   "Thread must be assigned to this core.");
+        scheduled_queue[core].add(thread, priority);
+    }
+
+    void SchedulePrepend(u32 priority, u32 core, Thread* thread) {
+        ASSERT_MSG(thread->GetProcessorID() == core,
+                   "Thread must be assigned to this core.");
+        scheduled_queue[core].add(thread, priority, false);
+    }
+
+    void Reschedule(u32 priority, u32 core, Thread* thread) {
+        scheduled_queue[core].remove(thread, priority);
+        scheduled_queue[core].add(thread, priority);
+    }
+
+    void Unschedule(u32 priority, u32 core, Thread* thread) {
+        scheduled_queue[core].remove(thread, priority);
+    }
+
+    void TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
+        bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
+        s32 source_core = thread->GetProcessorID();
+        if (source_core == destination_core || !schedulable)
+            return;
+        thread->SetProcessorID(destination_core);
+        if (source_core >= 0)
+            Unschedule(priority, source_core, thread);
+        if (destination_core >= 0) {
+            Unsuggest(priority, destination_core, thread);
+            Schedule(priority, destination_core, thread);
+        }
+        if (source_core >= 0)
+            Suggest(priority, source_core, thread);
+    }
+
+    void UnloadThread(s32 core);
+
+    void SelectThreads();
+    void SelectThread(u32 core);
+
+    bool HaveReadyThreads(u32 core_id) {
+        return !scheduled_queue[core_id].empty();
+    }
+
+    void YieldThread(Thread* thread);
+    void YieldThreadAndBalanceLoad(Thread* thread);
+    void YieldThreadAndWaitForLoadBalancing(Thread* thread);
+
+    u32 CpuCoresCount() const {
+        return NUM_CPU_CORES;
+    }
+
+    void SetReselectionPending() {
+        reselection_pending.store(true, std::memory_order_release);
+    }
+
+    bool IsReselectionPending() {
+        return reselection_pending.load(std::memory_order_acquire);
+    }
+
+private:
+    void AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner);
+
+    static constexpr u32 min_regular_priority = 2;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
+    std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
+    std::atomic<bool> reselection_pending;
+
+    /// Lists all thread ids that aren't deleted/etc.
+    std::vector<SharedPtr<Thread>> thread_list;
+};
+
+class Scheduler final {
+public:
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 id);
+    ~Scheduler();
+
+    /// Returns whether there are any threads that are ready to run.
+    bool HaveReadyThreads() const;
+
+    /// Reschedules to the next available thread (call after current thread is suspended)
+    void TryDoContextSwitch();
+
+    void UnloadThread();
+
+    void SelectThreads();
+
+    /// Gets the current running thread
+    Thread* GetCurrentThread() const;
+
+    Thread* GetSelectedThread() const;
+
+    /// Gets the timestamp for the last context switch in ticks.
+    u64 GetLastContextSwitchTicks() const;
+
+    bool ContextSwitchPending() const {
+        return context_switch_pending;
+    }
+
+private:
+    friend class GlobalScheduler;
     /**
      * Switches the CPU's active thread context to that of the specified thread
      * @param new_thread The thread to switch to
      */
-    void SwitchContext(Thread* new_thread);
+    void SwitchContext();
 
     /**
      * Called on every context switch to update the internal timestamp
@@ -152,19 +169,16 @@ private:
      */
     void UpdateLastContextSwitchTime(Thread* thread, Process* process);
 
-    /// Lists all thread ids that aren't deleted/etc.
-    std::vector<SharedPtr<Thread>> thread_list;
-
-    /// Lists only ready thread ids.
-    Common::MultiLevelQueue<Thread*, THREADPRIO_LOWEST + 1> ready_queue;
-
     SharedPtr<Thread> current_thread = nullptr;
-
-    Core::ARM_Interface& cpu_core;
-    u64 last_context_switch_time = 0;
+    SharedPtr<Thread> selected_thread = nullptr;
 
     Core::System& system;
-    static std::mutex scheduler_mutex;
+    Core::ARM_Interface& cpu_core;
+    u64 last_context_switch_time = 0;
+    u64 idle_selection_count = 0;
+    const u32 id;
+
+    bool context_switch_pending = false;
 };
 
 } // namespace Kernel

From a1ac0c6cb47e10863b0bfbb1a6aadc71ccc513ab Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:01:46 -0400
Subject: [PATCH 02/29] Addapt thread class to the new Scheduler

---
 src/core/hle/kernel/thread.cpp | 242 +++++++++++++++++++++++++--------
 src/core/hle/kernel/thread.h   |  55 +++++++-
 2 files changed, 237 insertions(+), 60 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ec529e7f2d..d0fa7b3702 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -45,15 +45,7 @@ void Thread::Stop() {
                                                              callback_handle);
     kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
     callback_handle = 0;
-
-    // Clean up thread from ready queue
-    // This is only needed when the thread is terminated forcefully (SVC TerminateProcess)
-    if (status == ThreadStatus::Ready || status == ThreadStatus::Paused) {
-        scheduler->UnscheduleThread(this, current_priority);
-    }
-
-    status = ThreadStatus::Dead;
-
+    SetStatus(ThreadStatus::Dead);
     WakeupAllWaitingThreads();
 
     // Clean up any dangling references in objects that this thread was waiting for
@@ -132,13 +124,11 @@ void Thread::ResumeFromWait() {
     wakeup_callback = nullptr;
 
     if (activity == ThreadActivity::Paused) {
-        status = ThreadStatus::Paused;
+        SetStatus(ThreadStatus::Paused);
         return;
     }
 
-    status = ThreadStatus::Ready;
-
-    ChangeScheduler();
+    SetStatus(ThreadStatus::Ready);
 }
 
 void Thread::CancelWait() {
@@ -205,9 +195,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
     thread->name = std::move(name);
     thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
     thread->owner_process = &owner_process;
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.AddThread(thread);
     thread->tls_address = thread->owner_process->CreateTLSRegion();
-    thread->scheduler = &system.Scheduler(processor_id);
-    thread->scheduler->AddThread(thread);
 
     thread->owner_process->RegisterThread(thread.get());
 
@@ -250,6 +240,22 @@ void Thread::SetStatus(ThreadStatus new_status) {
         return;
     }
 
+    switch (new_status) {
+    case ThreadStatus::Ready:
+    case ThreadStatus::Running:
+        SetSchedulingStatus(ThreadSchedStatus::Runnable);
+        break;
+    case ThreadStatus::Dormant:
+        SetSchedulingStatus(ThreadSchedStatus::None);
+        break;
+    case ThreadStatus::Dead:
+        SetSchedulingStatus(ThreadSchedStatus::Exited);
+        break;
+    default:
+        SetSchedulingStatus(ThreadSchedStatus::Paused);
+        break;
+    }
+
     if (status == ThreadStatus::Running) {
         last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
     }
@@ -311,8 +317,7 @@ void Thread::UpdatePriority() {
         return;
     }
 
-    scheduler->SetThreadPriority(this, new_priority);
-    current_priority = new_priority;
+    SetCurrentPriority(new_priority);
 
     if (!lock_owner) {
         return;
@@ -328,47 +333,7 @@ void Thread::UpdatePriority() {
 }
 
 void Thread::ChangeCore(u32 core, u64 mask) {
-    ideal_core = core;
-    affinity_mask = mask;
-    ChangeScheduler();
-}
-
-void Thread::ChangeScheduler() {
-    if (status != ThreadStatus::Ready) {
-        return;
-    }
-
-    auto& system = Core::System::GetInstance();
-    std::optional<s32> new_processor_id{GetNextProcessorId(affinity_mask)};
-
-    if (!new_processor_id) {
-        new_processor_id = processor_id;
-    }
-    if (ideal_core != -1 && system.Scheduler(ideal_core).GetCurrentThread() == nullptr) {
-        new_processor_id = ideal_core;
-    }
-
-    ASSERT(*new_processor_id < 4);
-
-    // Add thread to new core's scheduler
-    auto& next_scheduler = system.Scheduler(*new_processor_id);
-
-    if (*new_processor_id != processor_id) {
-        // Remove thread from previous core's scheduler
-        scheduler->RemoveThread(this);
-        next_scheduler.AddThread(this);
-    }
-
-    processor_id = *new_processor_id;
-
-    // If the thread was ready, unschedule from the previous core and schedule on the new core
-    scheduler->UnscheduleThread(this, current_priority);
-    next_scheduler.ScheduleThread(this, current_priority);
-
-    // Change thread's scheduler
-    scheduler = &next_scheduler;
-
-    system.CpuCore(processor_id).PrepareReschedule();
+    SetCoreAndAffinityMask(core, mask);
 }
 
 bool Thread::AllWaitObjectsReady() const {
@@ -391,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) {
         if (status == ThreadStatus::Ready) {
             status = ThreadStatus::Paused;
         } else if (status == ThreadStatus::Running) {
-            status = ThreadStatus::Paused;
+            SetStatus(ThreadStatus::Paused);
             Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
         }
     } else if (status == ThreadStatus::Paused) {
@@ -408,6 +373,165 @@ void Thread::Sleep(s64 nanoseconds) {
     WakeAfterDelay(nanoseconds);
 }
 
+void Thread::YieldType0() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThread(this);
+}
+
+void Thread::YieldType1() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThreadAndBalanceLoad(this);
+}
+
+void Thread::YieldType2() {
+    auto& scheduler = kernel.GlobalScheduler();
+    scheduler.YieldThreadAndWaitForLoadBalancing(this);
+}
+
+void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
+    u32 old_flags = scheduling_state;
+    scheduling_state =
+        (scheduling_state & ThreadSchedMasks::HighMask) | static_cast<u32>(new_status);
+    AdjustSchedulingOnStatus(old_flags);
+}
+
+void Thread::SetCurrentPriority(u32 new_priority) {
+    u32 old_priority = current_priority;
+    current_priority = new_priority;
+    AdjustSchedulingOnPriority(old_priority);
+}
+
+ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
+    auto HighestSetCore = [](u64 mask, u32 max_cores) {
+        for (s32 core = max_cores - 1; core >= 0; core--) {
+            if (((mask >> core) & 1) != 0)
+                return core;
+        }
+        return -1;
+    };
+    bool use_override = affinity_override_count != 0;
+    // The value -3 is "do not change the ideal core".
+    if (new_core == -3) {
+        new_core = use_override ? ideal_core_override : ideal_core;
+        if ((new_affinity_mask & (1 << new_core)) == 0) {
+            return ERR_INVALID_COMBINATION;
+        }
+    }
+    if (use_override) {
+        ideal_core_override = new_core;
+        affinity_mask_override = new_affinity_mask;
+    } else {
+        u64 old_affinity_mask = affinity_mask;
+        ideal_core = new_core;
+        affinity_mask = new_affinity_mask;
+        if (old_affinity_mask != new_affinity_mask) {
+            s32 old_core = processor_id;
+            if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
+                if (ideal_core < 0) {
+                    processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
+                } else {
+                    processor_id = ideal_core;
+                }
+            }
+            AdjustSchedulingOnAffinity(old_affinity_mask, old_core);
+        }
+    }
+    return RESULT_SUCCESS;
+}
+
+void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
+    if (old_flags == scheduling_state)
+        return;
+
+    auto& scheduler = kernel.GlobalScheduler();
+    if (static_cast<ThreadSchedStatus>(old_flags & ThreadSchedMasks::LowMask) ==
+        ThreadSchedStatus::Runnable) {
+        // In this case the thread was running, now it's pausing/exitting
+        if (processor_id >= 0)
+            scheduler.Unschedule(current_priority, processor_id, this);
+
+        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0)
+                scheduler.Unsuggest(current_priority, core, this);
+        }
+    } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
+        // The thread is now set to running from being stopped
+        if (processor_id >= 0)
+            scheduler.Schedule(current_priority, processor_id, this);
+
+        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0)
+                scheduler.Suggest(current_priority, core, this);
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
+void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
+    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
+        return;
+    }
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    if (processor_id >= 0) {
+        scheduler.Unschedule(old_priority, processor_id, this);
+    }
+
+    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+            scheduler.Unsuggest(old_priority, core, this);
+        }
+    }
+
+    // Add thread to the new priority queues.
+    Thread* current_thread = GetCurrentThread();
+
+    if (processor_id >= 0) {
+        if (current_thread == this) {
+            scheduler.SchedulePrepend(current_priority, processor_id, this);
+        } else {
+            scheduler.Schedule(current_priority, processor_id, this);
+        }
+    }
+
+    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
+            scheduler.Suggest(current_priority, core, this);
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
+void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
+        current_priority >= THREADPRIO_COUNT)
+        return;
+
+    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (((old_affinity_mask >> core) & 1) != 0) {
+            if (core == old_core) {
+                scheduler.Unschedule(current_priority, core, this);
+            } else {
+                scheduler.Unsuggest(current_priority, core, this);
+            }
+        }
+    }
+
+    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        if (((affinity_mask >> core) & 1) != 0) {
+            if (core == processor_id) {
+                scheduler.Schedule(current_priority, core, this);
+            } else {
+                scheduler.Suggest(current_priority, core, this);
+            }
+        }
+    }
+
+    scheduler.SetReselectionPending();
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 07e989637e..c426a72096 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -75,6 +75,21 @@ enum class ThreadActivity : u32 {
     Paused = 1,
 };
 
+enum class ThreadSchedStatus : u32 { None = 0, Paused = 1, Runnable = 2, Exited = 3 };
+
+enum ThreadSchedFlags : u32 {
+    ProcessPauseFlag = 1 << 4,
+    ThreadPauseFlag = 1 << 5,
+    ProcessDebugPauseFlag = 1 << 6,
+    KernelInitPauseFlag = 1 << 8,
+};
+
+enum ThreadSchedMasks : u32 {
+    LowMask = 0x000f,
+    HighMask = 0xfff0,
+    ForcePauseMask = 0x0070,
+};
+
 class Thread final : public WaitObject {
 public:
     using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
@@ -278,6 +293,10 @@ public:
         return processor_id;
     }
 
+    void SetProcessorID(s32 new_core) {
+        processor_id = new_core;
+    }
+
     Process* GetOwnerProcess() {
         return owner_process;
     }
@@ -383,11 +402,38 @@ public:
     /// Sleeps this thread for the given amount of nanoseconds.
     void Sleep(s64 nanoseconds);
 
+    /// Yields this thread without rebalancing loads.
+    void YieldType0();
+
+    /// Yields this thread and does a load rebalancing.
+    void YieldType1();
+
+    /// Yields this thread and if the core is left idle, loads are rebalanced
+    void YieldType2();
+
+    ThreadSchedStatus GetSchedulingStatus() {
+        return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);
+    }
+
+    bool IsRunning() const {
+        return is_running;
+    }
+
+    void SetIsRunning(bool value) {
+        is_running = value;
+    }
+
 private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;
 
-    void ChangeScheduler();
+    void SetSchedulingStatus(ThreadSchedStatus new_status);
+    void SetCurrentPriority(u32 new_priority);
+    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
+
+    void AdjustSchedulingOnStatus(u32 old_flags);
+    void AdjustSchedulingOnPriority(u32 old_priority);
+    void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
 
     Core::ARM_Interface::ThreadContext context{};
 
@@ -453,6 +499,13 @@ private:
 
     ThreadActivity activity = ThreadActivity::Normal;
 
+    s32 ideal_core_override = -1;
+    u64 affinity_mask_override = 0x1;
+    u32 affinity_override_count = 0;
+
+    u32 scheduling_state = 0;
+    bool is_running = false;
+
     std::string name;
 };
 

From 57a71f899a95ccaa2984c1cb35c083221a29fd6e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:02:57 -0400
Subject: [PATCH 03/29] Add interfacing to the Global Scheduler

---
 src/core/core.cpp              | 10 ++++++++++
 src/core/core.h                |  7 +++++++
 src/core/hle/kernel/kernel.cpp | 10 ++++++++++
 src/core/hle/kernel/kernel.h   |  7 +++++++
 4 files changed, 34 insertions(+)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 4d0ac72a51..5565840fd9 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -444,6 +444,16 @@ const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
     return CpuCore(core_index).Scheduler();
 }
 
+/// Gets the global scheduler
+Kernel::GlobalScheduler& System::GlobalScheduler() {
+    return impl->kernel.GlobalScheduler();
+}
+
+/// Gets the global scheduler
+const Kernel::GlobalScheduler& System::GlobalScheduler() const {
+    return impl->kernel.GlobalScheduler();
+}
+
 Kernel::Process* System::CurrentProcess() {
     return impl->kernel.CurrentProcess();
 }
diff --git a/src/core/core.h b/src/core/core.h
index 90e7ac6075..2a002f6d77 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -27,6 +27,7 @@ namespace Kernel {
 class KernelCore;
 class Process;
 class Scheduler;
+class GlobalScheduler;
 } // namespace Kernel
 
 namespace Loader {
@@ -238,6 +239,12 @@ public:
     /// Gets the scheduler for the CPU core with the specified index
     const Kernel::Scheduler& Scheduler(std::size_t core_index) const;
 
+    /// Gets the global scheduler
+    Kernel::GlobalScheduler& GlobalScheduler();
+
+    /// Gets the global scheduler
+    const Kernel::GlobalScheduler& GlobalScheduler() const;
+
     /// Provides a pointer to the current process
     Kernel::Process* CurrentProcess();
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 799e5e0d85..b4fd1d3f32 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -18,6 +18,7 @@
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/scheduler.h"
 #include "core/hle/kernel/thread.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
@@ -140,6 +141,7 @@ struct KernelCore::Impl {
     // Lists all processes that exist in the current session.
     std::vector<SharedPtr<Process>> process_list;
     Process* current_process = nullptr;
+    Kernel::GlobalScheduler global_scheduler;
 
     SharedPtr<ResourceLimit> system_resource_limit;
 
@@ -203,6 +205,14 @@ const std::vector<SharedPtr<Process>>& KernelCore::GetProcessList() const {
     return impl->process_list;
 }
 
+Kernel::GlobalScheduler& KernelCore::GlobalScheduler() {
+    return impl->global_scheduler;
+}
+
+const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
+    return impl->global_scheduler;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
     impl->named_ports.emplace(std::move(name), std::move(port));
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 0cc44ee765..f9f5bdc88f 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -25,6 +25,7 @@ class HandleTable;
 class Process;
 class ResourceLimit;
 class Thread;
+class GlobalScheduler;
 
 /// Represents a single instance of the kernel.
 class KernelCore {
@@ -75,6 +76,12 @@ public:
     /// Retrieves the list of processes.
     const std::vector<SharedPtr<Process>>& GetProcessList() const;
 
+    /// Gets the sole instance of the global scheduler
+    Kernel::GlobalScheduler& GlobalScheduler();
+
+    /// Gets the sole instance of the global scheduler
+    const Kernel::GlobalScheduler& GlobalScheduler() const;
+
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
 

From 47c6c78c031b33af877a64aa1da2705558ab02c2 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:09:10 -0400
Subject: [PATCH 04/29] Redesign CPU Cores to work with the new scheduler

---
 src/core/core_cpu.cpp | 23 ++++++++++-------------
 src/core/core_cpu.h   |  2 ++
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 6bd9639c6e..2a7c3af240 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -52,7 +52,8 @@ bool CpuBarrier::Rendezvous() {
 
 Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
          std::size_t core_index)
-    : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
+    : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()},
+      core_timing{system.CoreTiming()}, core_index{core_index} {
 #ifdef ARCHITECTURE_x86_64
     arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
 #else
@@ -60,7 +61,7 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
     LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
 #endif
 
-    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
+    scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index);
 }
 
 Cpu::~Cpu() = default;
@@ -81,21 +82,21 @@ void Cpu::RunLoop(bool tight_loop) {
         return;
     }
 
+    Reschedule();
+
     // If we don't have a currently active thread then don't execute instructions,
     // instead advance to the next event and try to yield to the next thread
     if (Kernel::GetCurrentThread() == nullptr) {
         LOG_TRACE(Core, "Core-{} idling", core_index);
         core_timing.Idle();
-        core_timing.Advance();
-        PrepareReschedule();
     } else {
         if (tight_loop) {
             arm_interface->Run();
         } else {
             arm_interface->Step();
         }
-        core_timing.Advance();
     }
+    core_timing.Advance();
 
     Reschedule();
 }
@@ -106,18 +107,14 @@ void Cpu::SingleStep() {
 
 void Cpu::PrepareReschedule() {
     arm_interface->PrepareReschedule();
-    reschedule_pending = true;
 }
 
 void Cpu::Reschedule() {
-    if (!reschedule_pending) {
-        return;
-    }
-
-    reschedule_pending = false;
     // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard lock{HLE::g_hle_lock};
-    scheduler->Reschedule();
+    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+
+    global_scheduler.SelectThread(core_index);
+    scheduler->TryDoContextSwitch();
 }
 
 } // namespace Core
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 7589beb8c8..5dde2994ce 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -13,6 +13,7 @@
 
 namespace Kernel {
 class Scheduler;
+class GlobalScheduler;
 }
 
 namespace Core {
@@ -90,6 +91,7 @@ private:
 
     std::unique_ptr<ARM_Interface> arm_interface;
     CpuBarrier& cpu_barrier;
+    Kernel::GlobalScheduler& global_scheduler;
     std::unique_ptr<Kernel::Scheduler> scheduler;
     Timing::CoreTiming& core_timing;
 

From 9031502974fa25c9c8521ad96ecc8126fcac51c6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:11:25 -0400
Subject: [PATCH 05/29] Correct Supervisor Calls to work with the new
 scheduler,

---
 src/core/hle/kernel/svc.cpp | 67 +++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 1fd1a732a9..ee1e9f006a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -534,6 +534,8 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
     }
 
     thread->CancelWait();
+    if (thread->GetProcessorID() >= 0)
+        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -1066,6 +1068,9 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
     }
 
     thread->SetActivity(static_cast<ThreadActivity>(activity));
+
+    if (thread->GetProcessorID() >= 0)
+        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -1147,7 +1152,8 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
 
     thread->SetPriority(priority);
 
-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    if (thread->GetProcessorID() >= 0)
+        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -1503,7 +1509,8 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
     thread->SetName(
         fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
 
-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    if (thread->GetProcessorID() >= 0)
+        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
@@ -1525,7 +1532,10 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
     thread->ResumeFromWait();
 
     if (thread->GetStatus() == ThreadStatus::Ready) {
-        system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        if (thread->GetProcessorID() >= 0)
+            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        else
+            Core::System::GetInstance().GlobalScheduler().SetReselectionPending();
     }
 
     return RESULT_SUCCESS;
@@ -1537,7 +1547,7 @@ static void ExitThread(Core::System& system) {
 
     auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
     current_thread->Stop();
-    system.CurrentScheduler().RemoveThread(current_thread);
+    system.GlobalScheduler().RemoveThread(current_thread);
     system.PrepareReschedule();
 }
 
@@ -1557,13 +1567,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            scheduler.YieldWithoutLoadBalancing(current_thread);
+            current_thread->YieldType0();
             break;
         case SleepType::YieldWithLoadBalancing:
-            scheduler.YieldWithLoadBalancing(current_thread);
+            current_thread->YieldType1();
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            scheduler.YieldAndWaitForLoadBalancing(current_thread);
+            current_thread->YieldType2();
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1632,24 +1642,16 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
-    const auto RetrieveWaitingThreads = [&system](std::size_t core_index,
-                                                  std::vector<SharedPtr<Thread>>& waiting_threads,
-                                                  VAddr condvar_addr) {
-        const auto& scheduler = system.Scheduler(core_index);
-        const auto& thread_list = scheduler.GetThreadList();
-
-        for (const auto& thread : thread_list) {
-            if (thread->GetCondVarWaitAddress() == condvar_addr)
-                waiting_threads.push_back(thread);
-        }
-    };
-
     // Retrieve a list of all threads that are waiting for this condition variable.
     std::vector<SharedPtr<Thread>> waiting_threads;
-    RetrieveWaitingThreads(0, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(1, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(2, waiting_threads, condition_variable_addr);
-    RetrieveWaitingThreads(3, waiting_threads, condition_variable_addr);
+    const auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    const auto& thread_list = scheduler.GetThreadList();
+
+    for (const auto& thread : thread_list) {
+        if (thread->GetCondVarWaitAddress() == condition_variable_addr)
+            waiting_threads.push_back(thread);
+    }
+
     // Sort them by priority, such that the highest priority ones come first.
     std::sort(waiting_threads.begin(), waiting_threads.end(),
               [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
@@ -1704,7 +1706,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
             thread->SetWaitHandle(0);
-            system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+            if (thread->GetProcessorID() >= 0)
+                Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1728,6 +1731,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
             thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
+            if (thread->GetProcessorID() >= 0)
+                Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
         }
     }
 
@@ -1753,8 +1758,14 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
     }
 
     const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
-    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
+    auto& address_arbiter =
+        system.Kernel().CurrentProcess()->GetAddressArbiter();
+    ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
+    if (result == RESULT_SUCCESS)
+        Core::System::GetInstance()
+            .CpuCore(GetCurrentThread()->GetProcessorID())
+            .PrepareReschedule();
+    return result;
 }
 
 // Signals to an address (via Address Arbiter)
@@ -2040,7 +2051,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
         return ERR_INVALID_HANDLE;
     }
 
+    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     thread->ChangeCore(core, affinity_mask);
+    if (thread->GetProcessorID() >= 0)
+        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -2151,6 +2165,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
     }
 
     writable_event->Signal();
+    Core::System::GetInstance().PrepareReschedule();
     return RESULT_SUCCESS;
 }
 

From b8b7ebcece955316680a09eb68b891e0acff9fcc Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:12:02 -0400
Subject: [PATCH 06/29] Correct compiling errors and addapt to the new
 interface.

---
 src/core/gdbstub/gdbstub.cpp    | 32 +++++++++++++-------------------
 src/core/hle/kernel/process.cpp |  5 +----
 src/yuzu/debugger/wait_tree.cpp |  5 +----
 3 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index db51d722f0..20bb50868c 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -202,13 +202,11 @@ void RegisterModule(std::string name, VAddr beg, VAddr end, bool add_elf_ext) {
 }
 
 static Kernel::Thread* FindThreadById(s64 id) {
-    for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-        const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-        for (auto& thread : threads) {
-            if (thread->GetThreadID() == static_cast<u64>(id)) {
-                current_core = core;
-                return thread.get();
-            }
+    const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+    for (auto& thread : threads) {
+        if (thread->GetThreadID() == static_cast<u64>(id)) {
+            current_core = thread->GetProcessorID();
+            return thread.get();
         }
     }
     return nullptr;
@@ -647,11 +645,9 @@ static void HandleQuery() {
         SendReply(buffer.c_str());
     } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
         std::string val = "m";
-        for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-            const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-            for (const auto& thread : threads) {
-                val += fmt::format("{:x},", thread->GetThreadID());
-            }
+        const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+        for (const auto& thread : threads) {
+            val += fmt::format("{:x},", thread->GetThreadID());
         }
         val.pop_back();
         SendReply(val.c_str());
@@ -661,13 +657,11 @@ static void HandleQuery() {
         std::string buffer;
         buffer += "l<?xml version=\"1.0\"?>";
         buffer += "<threads>";
-        for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) {
-            const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList();
-            for (const auto& thread : threads) {
-                buffer +=
-                    fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
-                                thread->GetThreadID(), core, thread->GetThreadID());
-            }
+        const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList();
+        for (const auto& thread : threads) {
+            buffer +=
+                fmt::format(R"*(<thread id="{:x}" core="{:d}" name="Thread {:x}"></thread>)*",
+                            thread->GetThreadID(), thread->GetProcessorID(), thread->GetThreadID());
         }
         buffer += "</threads>";
         SendReply(buffer.c_str());
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index e80a12ac35..12a900bcc4 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -213,10 +213,7 @@ void Process::PrepareForTermination() {
         }
     };
 
-    stop_threads(system.Scheduler(0).GetThreadList());
-    stop_threads(system.Scheduler(1).GetThreadList());
-    stop_threads(system.Scheduler(2).GetThreadList());
-    stop_threads(system.Scheduler(3).GetThreadList());
+    stop_threads(system.GlobalScheduler().GetThreadList());
 
     FreeTLSRegion(tls_region_address);
     tls_region_address = 0;
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index cd8180f8bc..c5b9aa08f3 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -66,10 +66,7 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
     };
 
     const auto& system = Core::System::GetInstance();
-    add_threads(system.Scheduler(0).GetThreadList());
-    add_threads(system.Scheduler(1).GetThreadList());
-    add_threads(system.Scheduler(2).GetThreadList());
-    add_threads(system.Scheduler(3).GetThreadList());
+    add_threads(system.GlobalScheduler().GetThreadList());
 
     return item_list;
 }

From b5d1e447821eb21158669e0ef1d24d630602f1fe Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 29 Mar 2019 17:13:00 -0400
Subject: [PATCH 07/29] Add PrepareReschedule where required.

---
 src/core/hle/kernel/address_arbiter.cpp | 28 +++++++++++--------------
 src/core/hle/kernel/mutex.cpp           |  2 ++
 src/core/hle/kernel/wait_object.cpp     |  4 ++++
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index c8842410b1..77f7bb451a 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -22,6 +22,8 @@ namespace Kernel {
 namespace {
 // Wake up num_to_wake (or all) threads in a vector.
 void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+
+    auto& system = Core::System::GetInstance();
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
     std::size_t last = waiting_threads.size();
@@ -35,6 +37,8 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
         waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
         waiting_threads[i]->SetArbiterWaitAddress(0);
         waiting_threads[i]->ResumeFromWait();
+        if (waiting_threads[i]->GetProcessorID() >= 0)
+            system.CpuCore(waiting_threads[i]->GetProcessorID()).PrepareReschedule();
     }
 }
 } // Anonymous namespace
@@ -174,25 +178,17 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
 }
 
 std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const {
-    const auto RetrieveWaitingThreads = [this](std::size_t core_index,
-                                               std::vector<SharedPtr<Thread>>& waiting_threads,
-                                               VAddr arb_addr) {
-        const auto& scheduler = system.Scheduler(core_index);
-        const auto& thread_list = scheduler.GetThreadList();
-
-        for (const auto& thread : thread_list) {
-            if (thread->GetArbiterWaitAddress() == arb_addr) {
-                waiting_threads.push_back(thread);
-            }
-        }
-    };
 
     // Retrieve all threads that are waiting for this address.
     std::vector<SharedPtr<Thread>> threads;
-    RetrieveWaitingThreads(0, threads, address);
-    RetrieveWaitingThreads(1, threads, address);
-    RetrieveWaitingThreads(2, threads, address);
-    RetrieveWaitingThreads(3, threads, address);
+    const auto& scheduler = system.GlobalScheduler();
+    const auto& thread_list = scheduler.GetThreadList();
+
+    for (const auto& thread : thread_list) {
+        if (thread->GetArbiterWaitAddress() == address) {
+            threads.push_back(thread);
+        }
+    }
 
     // Sort them by priority, such that the highest priority ones come first.
     std::sort(threads.begin(), threads.end(),
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 98e87313bb..57f2d8bf30 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -140,6 +140,8 @@ ResultCode Mutex::Release(VAddr address) {
     thread->SetMutexWaitAddress(0);
     thread->SetWaitHandle(0);
 
+    Core::System::GetInstance().PrepareReschedule();
+
     return RESULT_SUCCESS;
 }
 } // namespace Kernel
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index 0e96ba8723..e035a67e99 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -6,6 +6,8 @@
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -95,6 +97,8 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
     }
     if (resume) {
         thread->ResumeFromWait();
+        if (thread->GetProcessorID() >= 0)
+            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
     }
 }
 

From 3a94e7ea3386cbd14e74255e0a4c7f8615a396c9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 2 Apr 2019 08:03:44 -0400
Subject: [PATCH 08/29] Comment and reorganize the scheduler

---
 src/core/hle/kernel/scheduler.cpp | 170 +++++++++++++-----------------
 src/core/hle/kernel/scheduler.h   |  38 ++++++-
 2 files changed, 107 insertions(+), 101 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 878aeed6d6..5376401526 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,6 +19,11 @@
 
 namespace Kernel {
 
+/*
+ * SelectThreads, Yield functions originally by TuxSH.
+ * licensed under GPLv2 or later under exception provided by the author.
+ */
+
 void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
     thread_list.push_back(std::move(thread));
 }
@@ -29,15 +34,23 @@ void GlobalScheduler::RemoveThread(Thread* thread) {
 }
 
 /*
- * SelectThreads, Yield functions originally by TuxSH.
- * licensed under GPLv2 or later under exception provided by the author.
+ * UnloadThread selects a core and forces it to unload its current thread's context
  */
-
 void GlobalScheduler::UnloadThread(s32 core) {
     Scheduler& sched = Core::System::GetInstance().Scheduler(core);
     sched.UnloadThread();
 }
 
+/*
+ * SelectThread takes care of selecting the new scheduled thread.
+ * It does it in 3 steps:
+ * - First a thread is selected from the top of the priority queue. If no thread
+ * is obtained then we move to step two, else we are done.
+ * - Second we try to get a suggested thread that's not assigned to any core or
+ * that is not the top thread in that core.
+ * - Third is no suggested thread is found, we do a second pass and pick a running
+ * thread in another core and swap it with its current thread.
+ */
 void GlobalScheduler::SelectThread(u32 core) {
     auto update_thread = [](Thread* thread, Scheduler& sched) {
         if (thread != sched.selected_thread) {
@@ -51,105 +64,58 @@ void GlobalScheduler::SelectThread(u32 core) {
     };
     Scheduler& sched = Core::System::GetInstance().Scheduler(core);
     Thread* current_thread = nullptr;
+    // Step 1: Get top thread in schedule queue.
     current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
-    if (!current_thread) {
-        Thread* winner = nullptr;
-        std::set<s32> sug_cores;
-        for (auto thread : suggested_queue[core]) {
-            s32 this_core = thread->GetProcessorID();
-            Thread* thread_on_core = nullptr;
-            if (this_core >= 0) {
-                thread_on_core = scheduled_queue[this_core].front();
-            }
-            if (this_core < 0 || thread != thread_on_core) {
-                winner = thread;
-                break;
-            }
-            sug_cores.insert(this_core);
+    if (current_thread) {
+        update_thread(current_thread, sched);
+        return;
+    }
+    // Step 2: Try selecting a suggested thread.
+    Thread* winner = nullptr;
+    std::set<s32> sug_cores;
+    for (auto thread : suggested_queue[core]) {
+        s32 this_core = thread->GetProcessorID();
+        Thread* thread_on_core = nullptr;
+        if (this_core >= 0) {
+            thread_on_core = scheduled_queue[this_core].front();
         }
-        if (winner && winner->GetPriority() > 2) {
-            if (winner->IsRunning()) {
-                UnloadThread(winner->GetProcessorID());
-            }
-            TransferToCore(winner->GetPriority(), core, winner);
-            current_thread = winner;
-        } else {
-            for (auto& src_core : sug_cores) {
-                auto it = scheduled_queue[src_core].begin();
-                it++;
-                if (it != scheduled_queue[src_core].end()) {
-                    Thread* thread_on_core = scheduled_queue[src_core].front();
-                    Thread* to_change = *it;
-                    if (thread_on_core->IsRunning() || to_change->IsRunning()) {
-                        UnloadThread(src_core);
-                    }
-                    TransferToCore(thread_on_core->GetPriority(), core, thread_on_core);
-                    current_thread = thread_on_core;
-                }
+        if (this_core < 0 || thread != thread_on_core) {
+            winner = thread;
+            break;
+        }
+        sug_cores.insert(this_core);
+    }
+    // if we got a suggested thread, select it, else do a second pass.
+    if (winner && winner->GetPriority() > 2) {
+        if (winner->IsRunning()) {
+            UnloadThread(winner->GetProcessorID());
+        }
+        TransferToCore(winner->GetPriority(), core, winner);
+        update_thread(winner, sched);
+        return;
+    }
+    // Step 3: Select a suggested thread from another core
+    for (auto& src_core : sug_cores) {
+        auto it = scheduled_queue[src_core].begin();
+        it++;
+        if (it != scheduled_queue[src_core].end()) {
+            Thread* thread_on_core = scheduled_queue[src_core].front();
+            Thread* to_change = *it;
+            if (thread_on_core->IsRunning() || to_change->IsRunning()) {
+                UnloadThread(src_core);
             }
+            TransferToCore(thread_on_core->GetPriority(), core, thread_on_core);
+            current_thread = thread_on_core;
+            break;
         }
     }
     update_thread(current_thread, sched);
 }
 
-void GlobalScheduler::SelectThreads() {
-    auto update_thread = [](Thread* thread, Scheduler& sched) {
-        if (thread != sched.selected_thread) {
-            if (thread == nullptr) {
-                ++sched.idle_selection_count;
-            }
-            sched.selected_thread = thread;
-        }
-        sched.context_switch_pending = sched.selected_thread != sched.current_thread;
-        std::atomic_thread_fence(std::memory_order_seq_cst);
-    };
-
-    auto& system = Core::System::GetInstance();
-
-    std::unordered_set<Thread*> picked_threads;
-    // This maintain the "current thread is on front of queue" invariant
-    std::array<Thread*, NUM_CPU_CORES> current_threads;
-    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
-        Scheduler& sched = system.Scheduler(i);
-        current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
-        if (current_threads[i])
-            picked_threads.insert(current_threads[i]);
-        update_thread(current_threads[i], sched);
-    }
-
-    // Do some load-balancing. Allow second pass.
-    std::array<Thread*, NUM_CPU_CORES> current_threads_2 = current_threads;
-    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
-        if (!scheduled_queue[i].empty()) {
-            continue;
-        }
-        Thread* winner = nullptr;
-        for (auto thread : suggested_queue[i]) {
-            if (thread->GetProcessorID() < 0 || thread != current_threads[i]) {
-                if (picked_threads.count(thread) == 0 && !thread->IsRunning()) {
-                    winner = thread;
-                    break;
-                }
-            }
-        }
-        if (winner) {
-            TransferToCore(winner->GetPriority(), i, winner);
-            current_threads_2[i] = winner;
-            picked_threads.insert(winner);
-        }
-    }
-
-    // See which to-be-current threads have changed & update accordingly
-    for (u32 i = 0; i < NUM_CPU_CORES; i++) {
-        Scheduler& sched = system.Scheduler(i);
-        if (current_threads_2[i] != current_threads[i]) {
-            update_thread(current_threads_2[i], sched);
-        }
-    }
-
-    reselection_pending.store(false, std::memory_order_release);
-}
-
+/*
+ * YieldThread takes a thread and moves it to the back of the it's priority list
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
 void GlobalScheduler::YieldThread(Thread* yielding_thread) {
     // Note: caller should use critical section, etc.
     u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
@@ -164,6 +130,12 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) {
     AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
+/*
+ * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list.
+ * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
+ * a better priority than the next thread in the core.
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
 void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
@@ -213,6 +185,12 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
+/*
+ * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue
+ * and into the suggested queue. If no thread can be squeduled afterwards in that core,
+ * a suggested thread is obtained instead.
+ * This operation can be redundant and no scheduling is changed if marked as so.
+ */
 void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
@@ -256,8 +234,8 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 
 void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
     if (current_thread == winner) {
-        // Nintendo (not us) has a nullderef bug on current_thread->owner, but which is never
-        // triggered.
+        // TODO(blinkhawk): manage redundant operations, this is not implemented.
+        // as its mostly an optimization.
         // current_thread->SetRedundantSchedulerOperation();
     } else {
         reselection_pending.store(true, std::memory_order_release);
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 50fa7376b4..82ed64b556 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -48,14 +48,12 @@ public:
     }
 
     void Schedule(u32 priority, u32 core, Thread* thread) {
-        ASSERT_MSG(thread->GetProcessorID() == core,
-                   "Thread must be assigned to this core.");
+        ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
         scheduled_queue[core].add(thread, priority);
     }
 
     void SchedulePrepend(u32 priority, u32 core, Thread* thread) {
-        ASSERT_MSG(thread->GetProcessorID() == core,
-                   "Thread must be assigned to this core.");
+        ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
         scheduled_queue[core].add(thread, priority, false);
     }
 
@@ -84,17 +82,47 @@ public:
             Suggest(priority, source_core, thread);
     }
 
+    /*
+     * UnloadThread selects a core and forces it to unload its current thread's context
+     */
     void UnloadThread(s32 core);
 
-    void SelectThreads();
+    /*
+     * SelectThread takes care of selecting the new scheduled thread.
+     * It does it in 3 steps:
+     * - First a thread is selected from the top of the priority queue. If no thread
+     * is obtained then we move to step two, else we are done.
+     * - Second we try to get a suggested thread that's not assigned to any core or
+     * that is not the top thread in that core.
+     * - Third is no suggested thread is found, we do a second pass and pick a running
+     * thread in another core and swap it with its current thread.
+     */
     void SelectThread(u32 core);
 
     bool HaveReadyThreads(u32 core_id) {
         return !scheduled_queue[core_id].empty();
     }
 
+    /*
+     * YieldThread takes a thread and moves it to the back of the it's priority list
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
     void YieldThread(Thread* thread);
+
+    /*
+     * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list.
+     * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or
+     * a better priority than the next thread in the core.
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
     void YieldThreadAndBalanceLoad(Thread* thread);
+
+    /*
+     * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue
+     * and into the suggested queue. If no thread can be squeduled afterwards in that core,
+     * a suggested thread is obtained instead.
+     * This operation can be redundant and no scheduling is changed if marked as so.
+     */
     void YieldThreadAndWaitForLoadBalancing(Thread* thread);
 
     u32 CpuCoresCount() const {

From fcc6b34fff3c9322a35e6457a699e70585a7e014 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 2 Apr 2019 09:22:53 -0400
Subject: [PATCH 09/29] Correct PrepareReschedule

---
 src/core/core.cpp                       |  5 +++
 src/core/core.h                         |  3 ++
 src/core/core_cpu.h                     |  2 +-
 src/core/hle/kernel/address_arbiter.cpp |  5 +--
 src/core/hle/kernel/svc.cpp             | 49 +++++++++----------------
 src/core/hle/kernel/wait_object.cpp     |  3 +-
 6 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 5565840fd9..4a95630bd0 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -404,6 +404,11 @@ void System::PrepareReschedule() {
     CurrentCpuCore().PrepareReschedule();
 }
 
+void System::PrepareReschedule(s32 core_index) {
+    if (core_index >= 0)
+        CpuCore(core_index).PrepareReschedule();
+}
+
 PerfStatsResults System::GetAndResetPerfStats() {
     return impl->GetAndResetPerfStats();
 }
diff --git a/src/core/core.h b/src/core/core.h
index 2a002f6d77..0d1008895d 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -185,6 +185,9 @@ public:
     /// Prepare the core emulation for a reschedule
     void PrepareReschedule();
 
+    /// Prepare the core emulation for a reschedule
+    void PrepareReschedule(s32 core_index);
+
     /// Gets and resets core performance statistics
     PerfStatsResults GetAndResetPerfStats();
 
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 5dde2994ce..0cde547878 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -14,7 +14,7 @@
 namespace Kernel {
 class Scheduler;
 class GlobalScheduler;
-}
+} // namespace Kernel
 
 namespace Core {
 class System;
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 77f7bb451a..c66cd16ef3 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -37,8 +37,7 @@ void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_
         waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
         waiting_threads[i]->SetArbiterWaitAddress(0);
         waiting_threads[i]->ResumeFromWait();
-        if (waiting_threads[i]->GetProcessorID() >= 0)
-            system.CpuCore(waiting_threads[i]->GetProcessorID()).PrepareReschedule();
+        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
     }
 }
 } // Anonymous namespace
@@ -173,7 +172,7 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
 
     current_thread->WakeAfterDelay(timeout);
 
-    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(current_thread->GetProcessorID());
     return RESULT_TIMEOUT;
 }
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ee1e9f006a..560ac39455 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -516,7 +516,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
     thread->WakeAfterDelay(nano_seconds);
     thread->SetWakeupCallback(DefaultThreadWakeupCallback);
 
-    system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
 
     return RESULT_TIMEOUT;
 }
@@ -534,8 +534,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
     }
 
     thread->CancelWait();
-    if (thread->GetProcessorID() >= 0)
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1069,8 +1068,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
 
     thread->SetActivity(static_cast<ThreadActivity>(activity));
 
-    if (thread->GetProcessorID() >= 0)
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1152,8 +1150,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
 
     thread->SetPriority(priority);
 
-    if (thread->GetProcessorID() >= 0)
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1509,8 +1506,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
     thread->SetName(
         fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
 
-    if (thread->GetProcessorID() >= 0)
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
 
     return RESULT_SUCCESS;
 }
@@ -1532,10 +1528,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
     thread->ResumeFromWait();
 
     if (thread->GetStatus() == ThreadStatus::Ready) {
-        if (thread->GetProcessorID() >= 0)
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
-        else
-            Core::System::GetInstance().GlobalScheduler().SetReselectionPending();
+        system.PrepareReschedule(thread->GetProcessorID());
     }
 
     return RESULT_SUCCESS;
@@ -1582,10 +1575,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
         current_thread->Sleep(nanoseconds);
     }
 
-    // Reschedule all CPU cores
-    for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) {
-        system.CpuCore(i).PrepareReschedule();
-    }
+    system.PrepareReschedule(current_thread->GetProcessorID());
 }
 
 /// Wait process wide key atomic
@@ -1632,7 +1622,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(current_thread->GetProcessorID());
     return RESULT_SUCCESS;
 }
 
@@ -1644,7 +1634,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
 
     // Retrieve a list of all threads that are waiting for this condition variable.
     std::vector<SharedPtr<Thread>> waiting_threads;
-    const auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    const auto& scheduler = system.GlobalScheduler();
     const auto& thread_list = scheduler.GetThreadList();
 
     for (const auto& thread : thread_list) {
@@ -1706,8 +1696,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
             thread->SetWaitHandle(0);
-            if (thread->GetProcessorID() >= 0)
-                Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+            system.PrepareReschedule(thread->GetProcessorID());
         } else {
             // Atomically signal that the mutex now has a waiting thread.
             do {
@@ -1731,8 +1720,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
             thread->SetStatus(ThreadStatus::WaitMutex);
 
             owner->AddMutexWaiter(thread);
-            if (thread->GetProcessorID() >= 0)
-                Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+            system.PrepareReschedule(thread->GetProcessorID());
         }
     }
 
@@ -1758,13 +1746,10 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
     }
 
     const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
-    auto& address_arbiter =
-        system.Kernel().CurrentProcess()->GetAddressArbiter();
+    auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
     ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
     if (result == RESULT_SUCCESS)
-        Core::System::GetInstance()
-            .CpuCore(GetCurrentThread()->GetProcessorID())
-            .PrepareReschedule();
+        system.PrepareReschedule();
     return result;
 }
 
@@ -2051,10 +2036,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
         return ERR_INVALID_HANDLE;
     }
 
-    Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
     thread->ChangeCore(core, affinity_mask);
-    if (thread->GetProcessorID() >= 0)
-        Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+    system.PrepareReschedule(thread->GetProcessorID());
+
     return RESULT_SUCCESS;
 }
 
@@ -2165,7 +2150,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
     }
 
     writable_event->Signal();
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index e035a67e99..a65ec7dbc3 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -97,8 +97,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
     }
     if (resume) {
         thread->ResumeFromWait();
-        if (thread->GetProcessorID() >= 0)
-            Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule();
+        Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
     }
 }
 

From 82218c925af8bcbaa05ae9f39af2d2393de7681f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 19 Jun 2019 09:11:18 -0400
Subject: [PATCH 10/29] Kernel: Style and Corrections

---
 src/core/core.cpp                       |  5 +-
 src/core/core.h                         |  4 +-
 src/core/core_cpu.cpp                   |  2 +-
 src/core/core_cpu.h                     |  2 +-
 src/core/hle/kernel/address_arbiter.cpp |  1 -
 src/core/hle/kernel/kernel.cpp          |  2 +-
 src/core/hle/kernel/mutex.cpp           |  2 +-
 src/core/hle/kernel/scheduler.cpp       | 78 +++++++++++++++----------
 src/core/hle/kernel/scheduler.h         | 53 ++++++++++-------
 src/core/hle/kernel/svc.cpp             | 15 +++--
 src/core/hle/kernel/thread.cpp          | 54 +++++++++--------
 src/core/hle/kernel/thread.h            | 15 +++--
 12 files changed, 137 insertions(+), 96 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 4a95630bd0..d79045eeaa 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -404,9 +404,10 @@ void System::PrepareReschedule() {
     CurrentCpuCore().PrepareReschedule();
 }
 
-void System::PrepareReschedule(s32 core_index) {
-    if (core_index >= 0)
+void System::PrepareReschedule(const u32 core_index) {
+    if (core_index < GlobalScheduler().CpuCoresCount()) {
         CpuCore(core_index).PrepareReschedule();
+    }
 }
 
 PerfStatsResults System::GetAndResetPerfStats() {
diff --git a/src/core/core.h b/src/core/core.h
index 0d1008895d..984074ce35 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -24,10 +24,10 @@ class VfsFilesystem;
 } // namespace FileSys
 
 namespace Kernel {
+class GlobalScheduler;
 class KernelCore;
 class Process;
 class Scheduler;
-class GlobalScheduler;
 } // namespace Kernel
 
 namespace Loader {
@@ -186,7 +186,7 @@ public:
     void PrepareReschedule();
 
     /// Prepare the core emulation for a reschedule
-    void PrepareReschedule(s32 core_index);
+    void PrepareReschedule(u32 core_index);
 
     /// Gets and resets core performance statistics
     PerfStatsResults GetAndResetPerfStats();
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 2a7c3af240..a6f63e437a 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -111,7 +111,7 @@ void Cpu::PrepareReschedule() {
 
 void Cpu::Reschedule() {
     // Lock the global kernel mutex when we manipulate the HLE state
-    std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+    std::lock_guard lock(HLE::g_hle_lock);
 
     global_scheduler.SelectThread(core_index);
     scheduler->TryDoContextSwitch();
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 0cde547878..80261daf75 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -12,8 +12,8 @@
 #include "common/common_types.h"
 
 namespace Kernel {
-class Scheduler;
 class GlobalScheduler;
+class Scheduler;
 } // namespace Kernel
 
 namespace Core {
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index c66cd16ef3..4c1d3fd180 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -22,7 +22,6 @@ namespace Kernel {
 namespace {
 // Wake up num_to_wake (or all) threads in a vector.
 void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
-
     auto& system = Core::System::GetInstance();
     // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
     // them all.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index b4fd1d3f32..600d6ec742 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -89,7 +89,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
 }
 
 struct KernelCore::Impl {
-    explicit Impl(Core::System& system) : system{system} {}
+    explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {}
 
     void Initialize(KernelCore& kernel) {
         Shutdown();
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 57f2d8bf30..eb919246cd 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -140,7 +140,7 @@ ResultCode Mutex::Release(VAddr address) {
     thread->SetMutexWaitAddress(0);
     thread->SetWaitHandle(0);
 
-    Core::System::GetInstance().PrepareReschedule();
+    system.PrepareReschedule();
 
     return RESULT_SUCCESS;
 }
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5376401526..df4e9b7994 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -1,6 +1,9 @@
 // Copyright 2018 yuzu emulator team
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
+//
+// SelectThreads, Yield functions originally by TuxSH.
+// licensed under GPLv2 or later under exception provided by the author.
 
 #include <algorithm>
 #include <set>
@@ -19,16 +22,15 @@
 
 namespace Kernel {
 
-/*
- * SelectThreads, Yield functions originally by TuxSH.
- * licensed under GPLv2 or later under exception provided by the author.
- */
+GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {
+    reselection_pending = false;
+}
 
 void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
     thread_list.push_back(std::move(thread));
 }
 
-void GlobalScheduler::RemoveThread(Thread* thread) {
+void GlobalScheduler::RemoveThread(const Thread* thread) {
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
@@ -37,7 +39,7 @@ void GlobalScheduler::RemoveThread(Thread* thread) {
  * UnloadThread selects a core and forces it to unload its current thread's context
  */
 void GlobalScheduler::UnloadThread(s32 core) {
-    Scheduler& sched = Core::System::GetInstance().Scheduler(core);
+    Scheduler& sched = system.Scheduler(core);
     sched.UnloadThread();
 }
 
@@ -52,7 +54,7 @@ void GlobalScheduler::UnloadThread(s32 core) {
  * thread in another core and swap it with its current thread.
  */
 void GlobalScheduler::SelectThread(u32 core) {
-    auto update_thread = [](Thread* thread, Scheduler& sched) {
+    const auto update_thread = [](Thread* thread, Scheduler& sched) {
         if (thread != sched.selected_thread) {
             if (thread == nullptr) {
                 ++sched.idle_selection_count;
@@ -62,7 +64,7 @@ void GlobalScheduler::SelectThread(u32 core) {
         sched.context_switch_pending = sched.selected_thread != sched.current_thread;
         std::atomic_thread_fence(std::memory_order_seq_cst);
     };
-    Scheduler& sched = Core::System::GetInstance().Scheduler(core);
+    Scheduler& sched = system.Scheduler(core);
     Thread* current_thread = nullptr;
     // Step 1: Get top thread in schedule queue.
     current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
@@ -118,8 +120,8 @@ void GlobalScheduler::SelectThread(u32 core) {
  */
 void GlobalScheduler::YieldThread(Thread* yielding_thread) {
     // Note: caller should use critical section, etc.
-    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
-    u32 priority = yielding_thread->GetPriority();
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    const u32 priority = yielding_thread->GetPriority();
 
     // Yield the thread
     ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
@@ -139,8 +141,8 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) {
 void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
-    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
-    u32 priority = yielding_thread->GetPriority();
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    const u32 priority = yielding_thread->GetPriority();
 
     // Yield the thread
     ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
@@ -155,12 +157,13 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     Thread* next_thread = scheduled_queue[core_id].front(priority);
     Thread* winner = nullptr;
     for (auto& thread : suggested_queue[core_id]) {
-        s32 source_core = thread->GetProcessorID();
+        const s32 source_core = thread->GetProcessorID();
         if (source_core >= 0) {
             if (current_threads[source_core] != nullptr) {
                 if (thread == current_threads[source_core] ||
-                    current_threads[source_core]->GetPriority() < min_regular_priority)
+                    current_threads[source_core]->GetPriority() < min_regular_priority) {
                     continue;
+                }
             }
             if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
                 next_thread->GetPriority() < thread->GetPriority()) {
@@ -174,8 +177,9 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
 
     if (winner != nullptr) {
         if (winner != yielding_thread) {
-            if (winner->IsRunning())
+            if (winner->IsRunning()) {
                 UnloadThread(winner->GetProcessorID());
+            }
             TransferToCore(winner->GetPriority(), core_id, winner);
         }
     } else {
@@ -195,7 +199,7 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
     Thread* winner = nullptr;
-    u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
+    const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
 
     // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead
     TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread);
@@ -209,9 +213,10 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
             current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front();
         }
         for (auto& thread : suggested_queue[core_id]) {
-            s32 source_core = thread->GetProcessorID();
-            if (source_core < 0 || thread == current_threads[source_core])
+            const s32 source_core = thread->GetProcessorID();
+            if (source_core < 0 || thread == current_threads[source_core]) {
                 continue;
+            }
             if (current_threads[source_core] == nullptr ||
                 current_threads[source_core]->GetPriority() >= min_regular_priority) {
                 winner = thread;
@@ -220,8 +225,9 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
         }
         if (winner != nullptr) {
             if (winner != yielding_thread) {
-                if (winner->IsRunning())
+                if (winner->IsRunning()) {
                     UnloadThread(winner->GetProcessorID());
+                }
                 TransferToCore(winner->GetPriority(), core_id, winner);
             }
         } else {
@@ -232,6 +238,16 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
     AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
+void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
+    ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
+    scheduled_queue[core].add(thread, priority);
+}
+
+void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
+    ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
+    scheduled_queue[core].add(thread, priority, false);
+}
+
 void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
     if (current_thread == winner) {
         // TODO(blinkhawk): manage redundant operations, this is not implemented.
@@ -244,13 +260,13 @@ void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, T
 
 GlobalScheduler::~GlobalScheduler() = default;
 
-Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 id)
-    : system(system), cpu_core(cpu_core), id(id) {}
+Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id)
+    : system(system), cpu_core(cpu_core), core_id(core_id) {}
 
-Scheduler::~Scheduler() {}
+Scheduler::~Scheduler() = default;
 
 bool Scheduler::HaveReadyThreads() const {
-    return system.GlobalScheduler().HaveReadyThreads(id);
+    return system.GlobalScheduler().HaveReadyThreads(core_id);
 }
 
 Thread* Scheduler::GetCurrentThread() const {
@@ -262,7 +278,7 @@ Thread* Scheduler::GetSelectedThread() const {
 }
 
 void Scheduler::SelectThreads() {
-    system.GlobalScheduler().SelectThread(id);
+    system.GlobalScheduler().SelectThread(core_id);
 }
 
 u64 Scheduler::GetLastContextSwitchTicks() const {
@@ -270,13 +286,14 @@ u64 Scheduler::GetLastContextSwitchTicks() const {
 }
 
 void Scheduler::TryDoContextSwitch() {
-    if (context_switch_pending)
+    if (context_switch_pending) {
         SwitchContext();
+    }
 }
 
 void Scheduler::UnloadThread() {
     Thread* const previous_thread = GetCurrentThread();
-    Process* const previous_process = Core::CurrentProcess();
+    Process* const previous_process = system.Kernel().CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
 
@@ -301,10 +318,11 @@ void Scheduler::SwitchContext() {
     Thread* const new_thread = GetSelectedThread();
 
     context_switch_pending = false;
-    if (new_thread == previous_thread)
+    if (new_thread == previous_thread) {
         return;
+    }
 
-    Process* const previous_process = Core::CurrentProcess();
+    Process* const previous_process = system.Kernel().CurrentProcess();
 
     UpdateLastContextSwitchTime(previous_thread, previous_process);
 
@@ -324,7 +342,7 @@ void Scheduler::SwitchContext() {
 
     // Load context of new thread
     if (new_thread) {
-        ASSERT_MSG(new_thread->GetProcessorID() == this->id,
+        ASSERT_MSG(new_thread->GetProcessorID() == this->core_id,
                    "Thread must be assigned to this core.");
         ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
                    "Thread must be ready to become running.");
@@ -353,7 +371,7 @@ void Scheduler::SwitchContext() {
 
 void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     const u64 prev_switch_ticks = last_context_switch_time;
-    const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
+    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
     const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
 
     if (thread != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 82ed64b556..1c9d8a30f8 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -24,62 +24,70 @@ class GlobalScheduler final {
 public:
     static constexpr u32 NUM_CPU_CORES = 4;
 
-    GlobalScheduler() {
-        reselection_pending = false;
-    }
+    explicit GlobalScheduler(Core::System& system);
     ~GlobalScheduler();
     /// Adds a new thread to the scheduler
     void AddThread(SharedPtr<Thread> thread);
 
     /// Removes a thread from the scheduler
-    void RemoveThread(Thread* thread);
+    void RemoveThread(const Thread* thread);
 
     /// Returns a list of all threads managed by the scheduler
     const std::vector<SharedPtr<Thread>>& GetThreadList() const {
         return thread_list;
     }
 
+    // Add a thread to the suggested queue of a cpu core. Suggested threads may be
+    // picked if no thread is scheduled to run on the core.
     void Suggest(u32 priority, u32 core, Thread* thread) {
         suggested_queue[core].add(thread, priority);
     }
 
+    // Remove a thread to the suggested queue of a cpu core. Suggested threads may be
+    // picked if no thread is scheduled to run on the core.
     void Unsuggest(u32 priority, u32 core, Thread* thread) {
         suggested_queue[core].remove(thread, priority);
     }
 
-    void Schedule(u32 priority, u32 core, Thread* thread) {
-        ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
-        scheduled_queue[core].add(thread, priority);
-    }
+    // Add a thread to the scheduling queue of a cpu core. The thread is added at the
+    // back the queue in its priority level
+    void Schedule(u32 priority, u32 core, Thread* thread);
 
-    void SchedulePrepend(u32 priority, u32 core, Thread* thread) {
-        ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
-        scheduled_queue[core].add(thread, priority, false);
-    }
+    // Add a thread to the scheduling queue of a cpu core. The thread is added at the
+    // front the queue in its priority level
+    void SchedulePrepend(u32 priority, u32 core, Thread* thread);
 
+    // Reschedule an already scheduled thread based on a new priority
     void Reschedule(u32 priority, u32 core, Thread* thread) {
         scheduled_queue[core].remove(thread, priority);
         scheduled_queue[core].add(thread, priority);
     }
 
+    // Unschedule a thread.
     void Unschedule(u32 priority, u32 core, Thread* thread) {
         scheduled_queue[core].remove(thread, priority);
     }
 
+    // Transfers a thread into an specific core. If the destination_core is -1
+    // it will be unscheduled from its source code and added into its suggested
+    // queue.
     void TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
-        bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
-        s32 source_core = thread->GetProcessorID();
-        if (source_core == destination_core || !schedulable)
+        const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
+        const s32 source_core = thread->GetProcessorID();
+        if (source_core == destination_core || !schedulable) {
             return;
+        }
         thread->SetProcessorID(destination_core);
-        if (source_core >= 0)
+        if (source_core >= 0) {
             Unschedule(priority, source_core, thread);
+        }
         if (destination_core >= 0) {
             Unsuggest(priority, destination_core, thread);
             Schedule(priority, destination_core, thread);
         }
-        if (source_core >= 0)
+        if (source_core >= 0) {
             Suggest(priority, source_core, thread);
+        }
     }
 
     /*
@@ -99,7 +107,7 @@ public:
      */
     void SelectThread(u32 core);
 
-    bool HaveReadyThreads(u32 core_id) {
+    bool HaveReadyThreads(u32 core_id) const {
         return !scheduled_queue[core_id].empty();
     }
 
@@ -133,8 +141,8 @@ public:
         reselection_pending.store(true, std::memory_order_release);
     }
 
-    bool IsReselectionPending() {
-        return reselection_pending.load(std::memory_order_acquire);
+    bool IsReselectionPending() const {
+        return reselection_pending.load();
     }
 
 private:
@@ -147,11 +155,12 @@ private:
 
     /// Lists all thread ids that aren't deleted/etc.
     std::vector<SharedPtr<Thread>> thread_list;
+    Core::System& system;
 };
 
 class Scheduler final {
 public:
-    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 id);
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 core_id);
     ~Scheduler();
 
     /// Returns whether there are any threads that are ready to run.
@@ -204,7 +213,7 @@ private:
     Core::ARM_Interface& cpu_core;
     u64 last_context_switch_time = 0;
     u64 idle_selection_count = 0;
-    const u32 id;
+    const u32 core_id;
 
     bool context_switch_pending = false;
 };
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 560ac39455..d520ed0332 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1560,13 +1560,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            current_thread->YieldType0();
+            current_thread->YieldSimple();
             break;
         case SleepType::YieldWithLoadBalancing:
-            current_thread->YieldType1();
+            current_thread->YieldAndBalanceLoad();
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            current_thread->YieldType2();
+            current_thread->YieldAndWaitForLoadBalancing();
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1638,8 +1638,9 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
     const auto& thread_list = scheduler.GetThreadList();
 
     for (const auto& thread : thread_list) {
-        if (thread->GetCondVarWaitAddress() == condition_variable_addr)
+        if (thread->GetCondVarWaitAddress() == condition_variable_addr) {
             waiting_threads.push_back(thread);
+        }
     }
 
     // Sort them by priority, such that the highest priority ones come first.
@@ -1747,9 +1748,11 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
 
     const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type);
     auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
-    ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
-    if (result == RESULT_SUCCESS)
+    const ResultCode result =
+        address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
+    if (result == RESULT_SUCCESS) {
         system.PrepareReschedule();
+    }
     return result;
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index d0fa7b3702..8cf0a7ec77 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -373,43 +373,44 @@ void Thread::Sleep(s64 nanoseconds) {
     WakeAfterDelay(nanoseconds);
 }
 
-void Thread::YieldType0() {
+void Thread::YieldSimple() {
     auto& scheduler = kernel.GlobalScheduler();
     scheduler.YieldThread(this);
 }
 
-void Thread::YieldType1() {
+void Thread::YieldAndBalanceLoad() {
     auto& scheduler = kernel.GlobalScheduler();
     scheduler.YieldThreadAndBalanceLoad(this);
 }
 
-void Thread::YieldType2() {
+void Thread::YieldAndWaitForLoadBalancing() {
     auto& scheduler = kernel.GlobalScheduler();
     scheduler.YieldThreadAndWaitForLoadBalancing(this);
 }
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
-    u32 old_flags = scheduling_state;
+    const u32 old_flags = scheduling_state;
     scheduling_state =
         (scheduling_state & ThreadSchedMasks::HighMask) | static_cast<u32>(new_status);
     AdjustSchedulingOnStatus(old_flags);
 }
 
 void Thread::SetCurrentPriority(u32 new_priority) {
-    u32 old_priority = current_priority;
-    current_priority = new_priority;
+    u32 old_priority = std::exchange(current_priority, new_priority);
     AdjustSchedulingOnPriority(old_priority);
 }
 
 ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
-    auto HighestSetCore = [](u64 mask, u32 max_cores) {
+    const auto HighestSetCore = [](u64 mask, u32 max_cores) {
         for (s32 core = max_cores - 1; core >= 0; core--) {
-            if (((mask >> core) & 1) != 0)
+            if (((mask >> core) & 1) != 0) {
                 return core;
+            }
         }
         return -1;
     };
-    bool use_override = affinity_override_count != 0;
+
+    const bool use_override = affinity_override_count != 0;
     // The value -3 is "do not change the ideal core".
     if (new_core == -3) {
         new_core = use_override ? ideal_core_override : ideal_core;
@@ -421,11 +422,10 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
         ideal_core_override = new_core;
         affinity_mask_override = new_affinity_mask;
     } else {
-        u64 old_affinity_mask = affinity_mask;
+        const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask);
         ideal_core = new_core;
-        affinity_mask = new_affinity_mask;
         if (old_affinity_mask != new_affinity_mask) {
-            s32 old_core = processor_id;
+            const s32 old_core = processor_id;
             if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) {
                 if (ideal_core < 0) {
                     processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES);
@@ -440,28 +440,33 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
 }
 
 void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
-    if (old_flags == scheduling_state)
+    if (old_flags == scheduling_state) {
         return;
+    }
 
     auto& scheduler = kernel.GlobalScheduler();
     if (static_cast<ThreadSchedStatus>(old_flags & ThreadSchedMasks::LowMask) ==
         ThreadSchedStatus::Runnable) {
         // In this case the thread was running, now it's pausing/exitting
-        if (processor_id >= 0)
+        if (processor_id >= 0) {
             scheduler.Unschedule(current_priority, processor_id, this);
+        }
 
-        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
-            if (core != processor_id && ((affinity_mask >> core) & 1) != 0)
+        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
                 scheduler.Unsuggest(current_priority, core, this);
+            }
         }
     } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
         // The thread is now set to running from being stopped
-        if (processor_id >= 0)
+        if (processor_id >= 0) {
             scheduler.Schedule(current_priority, processor_id, this);
+        }
 
-        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
-            if (core != processor_id && ((affinity_mask >> core) & 1) != 0)
+        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+            if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
                 scheduler.Suggest(current_priority, core, this);
+            }
         }
     }
 
@@ -477,7 +482,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
         scheduler.Unschedule(old_priority, processor_id, this);
     }
 
-    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
         if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
             scheduler.Unsuggest(old_priority, core, this);
         }
@@ -494,7 +499,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
         }
     }
 
-    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
         if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
             scheduler.Suggest(current_priority, core, this);
         }
@@ -506,10 +511,11 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
 void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
     auto& scheduler = Core::System::GetInstance().GlobalScheduler();
     if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
-        current_priority >= THREADPRIO_COUNT)
+        current_priority >= THREADPRIO_COUNT) {
         return;
+    }
 
-    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
         if (((old_affinity_mask >> core) & 1) != 0) {
             if (core == old_core) {
                 scheduler.Unschedule(current_priority, core, this);
@@ -519,7 +525,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
         }
     }
 
-    for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+    for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
         if (((affinity_mask >> core) & 1) != 0) {
             if (core == processor_id) {
                 scheduler.Schedule(current_priority, core, this);
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c426a72096..bf0cae959a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -75,7 +75,12 @@ enum class ThreadActivity : u32 {
     Paused = 1,
 };
 
-enum class ThreadSchedStatus : u32 { None = 0, Paused = 1, Runnable = 2, Exited = 3 };
+enum class ThreadSchedStatus : u32 {
+    None = 0,
+    Paused = 1,
+    Runnable = 2,
+    Exited = 3,
+};
 
 enum ThreadSchedFlags : u32 {
     ProcessPauseFlag = 1 << 4,
@@ -403,15 +408,15 @@ public:
     void Sleep(s64 nanoseconds);
 
     /// Yields this thread without rebalancing loads.
-    void YieldType0();
+    void YieldSimple();
 
     /// Yields this thread and does a load rebalancing.
-    void YieldType1();
+    void YieldAndBalanceLoad();
 
     /// Yields this thread and if the core is left idle, loads are rebalanced
-    void YieldType2();
+    void YieldAndWaitForLoadBalancing();
 
-    ThreadSchedStatus GetSchedulingStatus() {
+    ThreadSchedStatus GetSchedulingStatus() const {
         return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);
     }
 

From 103f3a2fe51a09caf3f478226b6957b23c6eff79 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Sep 2019 10:23:43 -0400
Subject: [PATCH 11/29] Scheduler: Add protections for Yield bombing

In case of redundant yields, the scheduler will now idle the core for
it's timeslice, in order to avoid continuously yielding the same thing
over and over.
---
 src/core/hle/kernel/scheduler.cpp | 16 +++++++++-------
 src/core/hle/kernel/scheduler.h   |  8 ++++----
 src/core/hle/kernel/svc.cpp       | 13 +++++++++----
 src/core/hle/kernel/thread.cpp    | 12 ++++++------
 src/core/hle/kernel/thread.h      |  6 +++---
 5 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index df4e9b7994..451fd8077d 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -118,7 +118,7 @@ void GlobalScheduler::SelectThread(u32 core) {
  * YieldThread takes a thread and moves it to the back of the it's priority list
  * This operation can be redundant and no scheduling is changed if marked as so.
  */
-void GlobalScheduler::YieldThread(Thread* yielding_thread) {
+bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
     // Note: caller should use critical section, etc.
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
     const u32 priority = yielding_thread->GetPriority();
@@ -129,7 +129,7 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) {
     scheduled_queue[core_id].yield(priority);
 
     Thread* winner = scheduled_queue[core_id].front(priority);
-    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
 /*
@@ -138,7 +138,7 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) {
  * a better priority than the next thread in the core.
  * This operation can be redundant and no scheduling is changed if marked as so.
  */
-void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
+bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
     const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
@@ -186,7 +186,7 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
         winner = next_thread;
     }
 
-    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
 /*
@@ -195,7 +195,7 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
  * a suggested thread is obtained instead.
  * This operation can be redundant and no scheduling is changed if marked as so.
  */
-void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
+bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
     // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
     // etc.
     Thread* winner = nullptr;
@@ -235,7 +235,7 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
         }
     }
 
-    AskForReselectionOrMarkRedundant(yielding_thread, winner);
+    return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
 void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
@@ -248,13 +248,15 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
     scheduled_queue[core].add(thread, priority, false);
 }
 
-void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
+bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
     if (current_thread == winner) {
         // TODO(blinkhawk): manage redundant operations, this is not implemented.
         // as its mostly an optimization.
         // current_thread->SetRedundantSchedulerOperation();
+        return true;
     } else {
         reselection_pending.store(true, std::memory_order_release);
+        return false;
     }
 }
 
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1c9d8a30f8..8fcc86bae1 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -115,7 +115,7 @@ public:
      * YieldThread takes a thread and moves it to the back of the it's priority list
      * This operation can be redundant and no scheduling is changed if marked as so.
      */
-    void YieldThread(Thread* thread);
+    bool YieldThread(Thread* thread);
 
     /*
      * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list.
@@ -123,7 +123,7 @@ public:
      * a better priority than the next thread in the core.
      * This operation can be redundant and no scheduling is changed if marked as so.
      */
-    void YieldThreadAndBalanceLoad(Thread* thread);
+    bool YieldThreadAndBalanceLoad(Thread* thread);
 
     /*
      * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue
@@ -131,7 +131,7 @@ public:
      * a suggested thread is obtained instead.
      * This operation can be redundant and no scheduling is changed if marked as so.
      */
-    void YieldThreadAndWaitForLoadBalancing(Thread* thread);
+    bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
 
     u32 CpuCoresCount() const {
         return NUM_CPU_CORES;
@@ -146,7 +146,7 @@ public:
     }
 
 private:
-    void AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner);
+    bool AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner);
 
     static constexpr u32 min_regular_priority = 2;
     std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index d520ed0332..bd67fc96d7 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1556,17 +1556,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
 
     auto& scheduler = system.CurrentScheduler();
     auto* const current_thread = scheduler.GetCurrentThread();
+    bool redundant = false;
 
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            current_thread->YieldSimple();
+            redundant = current_thread->YieldSimple();
             break;
         case SleepType::YieldWithLoadBalancing:
-            current_thread->YieldAndBalanceLoad();
+            redundant = current_thread->YieldAndBalanceLoad();
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            current_thread->YieldAndWaitForLoadBalancing();
+            redundant = current_thread->YieldAndWaitForLoadBalancing();
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1575,7 +1576,11 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
         current_thread->Sleep(nanoseconds);
     }
 
-    system.PrepareReschedule(current_thread->GetProcessorID());
+    if (redundant) {
+        system.CoreTiming().Idle();
+    } else {
+        system.PrepareReschedule(current_thread->GetProcessorID());
+    }
 }
 
 /// Wait process wide key atomic
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 8cf0a7ec77..ae62609e38 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -373,19 +373,19 @@ void Thread::Sleep(s64 nanoseconds) {
     WakeAfterDelay(nanoseconds);
 }
 
-void Thread::YieldSimple() {
+bool Thread::YieldSimple() {
     auto& scheduler = kernel.GlobalScheduler();
-    scheduler.YieldThread(this);
+    return scheduler.YieldThread(this);
 }
 
-void Thread::YieldAndBalanceLoad() {
+bool Thread::YieldAndBalanceLoad() {
     auto& scheduler = kernel.GlobalScheduler();
-    scheduler.YieldThreadAndBalanceLoad(this);
+    return scheduler.YieldThreadAndBalanceLoad(this);
 }
 
-void Thread::YieldAndWaitForLoadBalancing() {
+bool Thread::YieldAndWaitForLoadBalancing() {
     auto& scheduler = kernel.GlobalScheduler();
-    scheduler.YieldThreadAndWaitForLoadBalancing(this);
+    return scheduler.YieldThreadAndWaitForLoadBalancing(this);
 }
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index bf0cae959a..88255099f5 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -408,13 +408,13 @@ public:
     void Sleep(s64 nanoseconds);
 
     /// Yields this thread without rebalancing loads.
-    void YieldSimple();
+    bool YieldSimple();
 
     /// Yields this thread and does a load rebalancing.
-    void YieldAndBalanceLoad();
+    bool YieldAndBalanceLoad();
 
     /// Yields this thread and if the core is left idle, loads are rebalanced
-    void YieldAndWaitForLoadBalancing();
+    bool YieldAndWaitForLoadBalancing();
 
     ThreadSchedStatus GetSchedulingStatus() const {
         return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);

From b49c0dab8772afb06358e5d19af092226b3a59bb Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Sep 2019 11:04:40 -0400
Subject: [PATCH 12/29] Kernel: Initial implementation of thread preemption.

---
 src/core/hle/kernel/kernel.cpp    | 16 ++++++++++++++++
 src/core/hle/kernel/scheduler.cpp | 10 ++++++++++
 src/core/hle/kernel/scheduler.h   |  4 ++++
 3 files changed, 30 insertions(+)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 600d6ec742..7a913520dc 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -12,6 +12,7 @@
 
 #include "core/core.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
 #include "core/hle/kernel/handle_table.h"
@@ -96,6 +97,7 @@ struct KernelCore::Impl {
 
         InitializeSystemResourceLimit(kernel);
         InitializeThreads();
+        InitializePreemption();
     }
 
     void Shutdown() {
@@ -111,6 +113,7 @@ struct KernelCore::Impl {
 
         thread_wakeup_callback_handle_table.Clear();
         thread_wakeup_event_type = nullptr;
+        preemption_event = nullptr;
 
         named_ports.clear();
     }
@@ -133,6 +136,18 @@ struct KernelCore::Impl {
             system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback);
     }
 
+    void InitializePreemption() {
+        preemption_event = system.CoreTiming().RegisterEvent(
+            "PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
+                global_scheduler.PreemptThreads();
+                s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
+                system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
+            });
+
+        s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
+        system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
+    }
+
     std::atomic<u32> next_object_id{0};
     std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin};
     std::atomic<u64> next_user_process_id{Process::ProcessIDMin};
@@ -146,6 +161,7 @@ struct KernelCore::Impl {
     SharedPtr<ResourceLimit> system_resource_limit;
 
     Core::Timing::EventType* thread_wakeup_event_type = nullptr;
+    Core::Timing::EventType* preemption_event = nullptr;
     // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
     // allowing us to simply use a pool index or similar.
     Kernel::HandleTable thread_wakeup_callback_handle_table;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 451fd8077d..0d45307cd3 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -238,6 +238,16 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
     return AskForReselectionOrMarkRedundant(yielding_thread, winner);
 }
 
+void GlobalScheduler::PreemptThreads() {
+    for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
+        const u64 priority = preemption_priorities[core_id];
+        if (scheduled_queue[core_id].size(priority) > 1) {
+            scheduled_queue[core_id].yield(priority);
+            reselection_pending.store(true, std::memory_order_release);
+        }
+    }
+}
+
 void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
     ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
     scheduled_queue[core].add(thread, priority);
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 8fcc86bae1..c13a368fd4 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -133,6 +133,8 @@ public:
      */
     bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
 
+    void PreemptThreads();
+
     u32 CpuCoresCount() const {
         return NUM_CPU_CORES;
     }
@@ -153,6 +155,8 @@ private:
     std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
     std::atomic<bool> reselection_pending;
 
+    std::array<u64, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+
     /// Lists all thread ids that aren't deleted/etc.
     std::vector<SharedPtr<Thread>> thread_list;
     Core::System& system;

From 2d382de6fa79123fae7842246588651ee99b15e2 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 10 Sep 2019 15:26:24 -0400
Subject: [PATCH 13/29] Scheduler: Corrections to YieldAndBalanceLoad and Yield
 bombing protection.

---
 src/core/hle/kernel/scheduler.cpp | 14 +++++++-------
 src/core/hle/kernel/scheduler.h   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 0d45307cd3..78463cef55 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -165,12 +165,12 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
                     continue;
                 }
             }
-            if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
-                next_thread->GetPriority() < thread->GetPriority()) {
-                if (thread->GetPriority() <= priority) {
-                    winner = thread;
-                    break;
-                }
+        }
+        if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() ||
+            next_thread->GetPriority() < thread->GetPriority()) {
+            if (thread->GetPriority() <= priority) {
+                winner = thread;
+                break;
             }
         }
     }
@@ -240,7 +240,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 
 void GlobalScheduler::PreemptThreads() {
     for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
-        const u64 priority = preemption_priorities[core_id];
+        const u32 priority = preemption_priorities[core_id];
         if (scheduled_queue[core_id].size(priority) > 1) {
             scheduled_queue[core_id].yield(priority);
             reselection_pending.store(true, std::memory_order_release);
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index c13a368fd4..408e20c883 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -155,7 +155,7 @@ private:
     std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
     std::atomic<bool> reselection_pending;
 
-    std::array<u64, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+    std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
 
     /// Lists all thread ids that aren't deleted/etc.
     std::vector<SharedPtr<Thread>> thread_list;

From 0cf26cee593c3c6abe909f3db52d972f846b13a9 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 11 Sep 2019 12:14:37 -0400
Subject: [PATCH 14/29] Scheduler: Implement Yield Count and Core migration on
 Thread Preemption.

---
 src/core/hle/kernel/scheduler.cpp | 81 +++++++++++++++++++++++++++++--
 src/core/hle/kernel/thread.h      |  9 ++++
 2 files changed, 85 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 78463cef55..5581c43bf8 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -241,10 +241,83 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
 void GlobalScheduler::PreemptThreads() {
     for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) {
         const u32 priority = preemption_priorities[core_id];
-        if (scheduled_queue[core_id].size(priority) > 1) {
+
+        if (scheduled_queue[core_id].size(priority) > 0) {
+            scheduled_queue[core_id].front(priority)->IncrementYieldCount();
             scheduled_queue[core_id].yield(priority);
-            reselection_pending.store(true, std::memory_order_release);
+            if (scheduled_queue[core_id].size(priority) > 1) {
+                scheduled_queue[core_id].front(priority)->IncrementYieldCount();
+            }
         }
+
+        Thread* current_thread =
+            scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front();
+        Thread* winner = nullptr;
+        for (auto& thread : suggested_queue[core_id]) {
+            const s32 source_core = thread->GetProcessorID();
+            if (thread->GetPriority() != priority) {
+                continue;
+            }
+            if (source_core >= 0) {
+                Thread* next_thread = scheduled_queue[source_core].empty()
+                                          ? nullptr
+                                          : scheduled_queue[source_core].front();
+                if (next_thread != nullptr && next_thread->GetPriority() < 2) {
+                    break;
+                }
+                if (next_thread == thread) {
+                    continue;
+                }
+            }
+            if (current_thread != nullptr &&
+                current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
+                winner = thread;
+                break;
+            }
+        }
+
+        if (winner != nullptr) {
+            if (winner->IsRunning()) {
+                UnloadThread(winner->GetProcessorID());
+            }
+            TransferToCore(winner->GetPriority(), core_id, winner);
+            current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
+        }
+
+        if (current_thread != nullptr && current_thread->GetPriority() > priority) {
+            for (auto& thread : suggested_queue[core_id]) {
+                const s32 source_core = thread->GetProcessorID();
+                if (thread->GetPriority() > priority) {
+                    continue;
+                }
+                if (source_core >= 0) {
+                    Thread* next_thread = scheduled_queue[source_core].empty()
+                                              ? nullptr
+                                              : scheduled_queue[source_core].front();
+                    if (next_thread != nullptr && next_thread->GetPriority() < 2) {
+                        break;
+                    }
+                    if (next_thread == thread) {
+                        continue;
+                    }
+                }
+                if (current_thread != nullptr &&
+                    current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) {
+                    winner = thread;
+                    break;
+                }
+            }
+
+            if (winner != nullptr) {
+                if (winner->IsRunning()) {
+                    UnloadThread(winner->GetProcessorID());
+                }
+                TransferToCore(winner->GetPriority(), core_id, winner);
+                current_thread = winner;
+            }
+        }
+
+        reselection_pending.store(true, std::memory_order_release);
     }
 }
 
@@ -260,9 +333,7 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
 
 bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
     if (current_thread == winner) {
-        // TODO(blinkhawk): manage redundant operations, this is not implemented.
-        // as its mostly an optimization.
-        // current_thread->SetRedundantSchedulerOperation();
+        current_thread->IncrementYieldCount();
         return true;
     } else {
         reselection_pending.store(true, std::memory_order_release);
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 88255099f5..bec23a0e0a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -416,6 +416,14 @@ public:
     /// Yields this thread and if the core is left idle, loads are rebalanced
     bool YieldAndWaitForLoadBalancing();
 
+    void IncrementYieldCount() {
+        yield_count++;
+    }
+
+    u64 GetYieldCount() const {
+        return yield_count;
+    }
+
     ThreadSchedStatus GetSchedulingStatus() const {
         return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);
     }
@@ -460,6 +468,7 @@ private:
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
+    u64 yield_count = 0; ///< Number of innecessaries yields occured.
 
     s32 processor_id = 0;
 

From e05a8c2385a68be6b1f6079c656fa46336546927 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 11 Sep 2019 12:47:37 -0400
Subject: [PATCH 15/29] Kernel: Remove global system accessor from WaitObject

---
 src/core/hle/kernel/kernel.cpp      | 8 ++++++++
 src/core/hle/kernel/kernel.h        | 6 ++++++
 src/core/hle/kernel/scheduler.cpp   | 2 +-
 src/core/hle/kernel/wait_object.cpp | 3 ++-
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7a913520dc..77edbcd1f1 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -229,6 +229,14 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
     return impl->global_scheduler;
 }
 
+Core::System& KernelCore::System() {
+    return impl->system;
+}
+
+const Core::System& KernelCore::System() const {
+    return impl->system;
+}
+
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
     impl->named_ports.emplace(std::move(name), std::move(port));
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index f9f5bdc88f..0fc4d1f363 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -82,6 +82,12 @@ public:
     /// Gets the sole instance of the global scheduler
     const Kernel::GlobalScheduler& GlobalScheduler() const;
 
+    /// Gets the sole instance of the system
+    Core::System& System();
+
+    /// Gets the sole instance of the system
+    const Core::System& System() const;
+
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 5581c43bf8..60d936c9ac 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -287,7 +287,7 @@ void GlobalScheduler::PreemptThreads() {
         if (current_thread != nullptr && current_thread->GetPriority() > priority) {
             for (auto& thread : suggested_queue[core_id]) {
                 const s32 source_core = thread->GetProcessorID();
-                if (thread->GetPriority() > priority) {
+                if (thread->GetPriority() < priority) {
                     continue;
                 }
                 if (source_core >= 0) {
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index a65ec7dbc3..50ed2a2f15 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -8,6 +8,7 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/core_cpu.h"
+#include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/object.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/thread.h"
@@ -97,7 +98,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
     }
     if (resume) {
         thread->ResumeFromWait();
-        Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
+        kernel.System().PrepareReschedule(thread->GetProcessorID());
     }
 }
 

From 1ec1e8137356c64d624d90cd67acebb10f056abd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 30 Sep 2019 20:50:59 -0400
Subject: [PATCH 16/29] Kernel: Clang Format

---
 src/core/hle/kernel/scheduler.cpp | 3 ++-
 src/core/hle/kernel/thread.h      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 60d936c9ac..226d15d888 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -281,7 +281,8 @@ void GlobalScheduler::PreemptThreads() {
                 UnloadThread(winner->GetProcessorID());
             }
             TransferToCore(winner->GetPriority(), core_id, winner);
-            current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
+            current_thread =
+                winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
         }
 
         if (current_thread != nullptr && current_thread->GetPriority() > priority) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index bec23a0e0a..4d220c4f98 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -468,7 +468,7 @@ private:
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
-    u64 yield_count = 0; ///< Number of innecessaries yields occured.
+    u64 yield_count = 0;          ///< Number of innecessaries yields occured.
 
     s32 processor_id = 0;
 

From 44e09e5f21915391672558940842b92e3a64cb1b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 7 Oct 2019 18:57:13 -0400
Subject: [PATCH 17/29] Kernel: Correct Results in Condition Variables and
 Mutexes

---
 src/core/hle/kernel/kernel.cpp | 13 +++++++------
 src/core/hle/kernel/mutex.cpp  |  1 +
 src/core/hle/kernel/svc.cpp    | 27 +++++++++------------------
 3 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 77edbcd1f1..002c5af2bb 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -15,6 +15,7 @@
 #include "core/core_timing_util.h"
 #include "core/hle/kernel/address_arbiter.h"
 #include "core/hle/kernel/client_port.h"
+#include "core/hle/kernel/errors.h"
 #include "core/hle/kernel/handle_table.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
@@ -60,12 +61,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
         if (thread->HasWakeupCallback()) {
             resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
         }
-    }
-
-    if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 ||
-        thread->GetWaitHandle() != 0) {
-        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex ||
-               thread->GetStatus() == ThreadStatus::WaitCondVar);
+    } else if (thread->GetStatus() == ThreadStatus::WaitMutex ||
+               thread->GetStatus() == ThreadStatus::WaitCondVar) {
         thread->SetMutexWaitAddress(0);
         thread->SetCondVarWaitAddress(0);
         thread->SetWaitHandle(0);
@@ -85,6 +82,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
     }
 
     if (resume) {
+        if (thread->GetStatus() == ThreadStatus::WaitCondVar ||
+            thread->GetStatus() == ThreadStatus::WaitArb) {
+            thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
+        }
         thread->ResumeFromWait();
     }
 }
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index eb919246cd..663d0f4b6e 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -139,6 +139,7 @@ ResultCode Mutex::Release(VAddr address) {
     thread->SetCondVarWaitAddress(0);
     thread->SetMutexWaitAddress(0);
     thread->SetWaitHandle(0);
+    thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
 
     system.PrepareReschedule();
 
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index bd67fc96d7..823d1d4032 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1677,18 +1677,20 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
 
         // Atomically read the value of the mutex.
         u32 mutex_val = 0;
+        u32 update_val = 0;
+        const VAddr mutex_address = thread->GetMutexWaitAddress();
         do {
-            monitor.SetExclusive(current_core, thread->GetMutexWaitAddress());
+            monitor.SetExclusive(current_core, mutex_address);
 
             // If the mutex is not yet acquired, acquire it.
-            mutex_val = Memory::Read32(thread->GetMutexWaitAddress());
+            mutex_val = Memory::Read32(mutex_address);
 
             if (mutex_val != 0) {
-                monitor.ClearExclusive();
-                break;
+                update_val = mutex_val | Mutex::MutexHasWaitersFlag;
+            } else {
+                update_val = thread->GetWaitHandle();
             }
-        } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
-                                           thread->GetWaitHandle()));
+        } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
         if (mutex_val == 0) {
             // We were able to acquire the mutex, resume this thread.
             ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
@@ -1702,20 +1704,9 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
             thread->SetLockOwner(nullptr);
             thread->SetMutexWaitAddress(0);
             thread->SetWaitHandle(0);
+            thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
             system.PrepareReschedule(thread->GetProcessorID());
         } else {
-            // Atomically signal that the mutex now has a waiting thread.
-            do {
-                monitor.SetExclusive(current_core, thread->GetMutexWaitAddress());
-
-                // Ensure that the mutex value is still what we expect.
-                u32 value = Memory::Read32(thread->GetMutexWaitAddress());
-                // TODO(Subv): When this happens, the kernel just clears the exclusive state and
-                // retries the initial read for this thread.
-                ASSERT_MSG(mutex_val == value, "Unhandled synchronization primitive case");
-            } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(),
-                                               mutex_val | Mutex::MutexHasWaitersFlag));
-
             // The mutex is already owned by some other thread, make this thread wait on it.
             const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
             const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();

From 71768571775ff989a577a773574f5f5bdeb14d33 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 7 Oct 2019 19:09:57 -0400
Subject: [PATCH 18/29] Kernel: Corrections to
 ModifyByWaitingCountAndSignalToAddressIfEqual

---
 src/core/hle/kernel/address_arbiter.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 4c1d3fd180..de0a9064ef 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -91,12 +91,20 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
 
     // Determine the modified value depending on the waiting count.
     s32 updated_value;
-    if (waiting_threads.empty()) {
-        updated_value = value + 1;
-    } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
-        updated_value = value - 1;
+    if (num_to_wake <= 0) {
+        if (waiting_threads.empty()) {
+            updated_value = value + 1;
+        } else {
+            updated_value = value - 1;
+        }
     } else {
-        updated_value = value;
+        if (waiting_threads.empty()) {
+            updated_value = value + 1;
+        } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+            updated_value = value - 1;
+        } else {
+            updated_value = value;
+        }
     }
 
     if (static_cast<s32>(Memory::Read32(address)) != value) {

From 27d571c08436e7131f67fed2771434a571c1e976 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 8 Oct 2019 18:35:04 -0400
Subject: [PATCH 19/29] Kernel: Correct redundant yields to only advance time
 forward.

---
 src/core/hle/kernel/svc.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 823d1d4032..101f72b7d4 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1577,10 +1577,12 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
     }
 
     if (redundant) {
-        system.CoreTiming().Idle();
-    } else {
-        system.PrepareReschedule(current_thread->GetProcessorID());
+        // If it's redundant, the core is pretty much idle. Some games keep idling
+        // a core while it's doing nothing, we advance timing to avoid costly continuos
+        // calls.
+        system.CoreTiming().AddTicks(2000);
     }
+    system.PrepareReschedule(current_thread->GetProcessorID());
 }
 
 /// Wait process wide key atomic

From 1c6a11ab142d18c3444629940f183b7c1865a5e2 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 10 Oct 2019 08:04:14 -0400
Subject: [PATCH 20/29] Kernel: Corrections to Wait Objects clearing in which a
 thread could still be signalled after a timeout or a cancel.

---
 src/core/hle/kernel/thread.cpp      | 1 +
 src/core/hle/kernel/thread.h        | 3 +++
 src/core/hle/kernel/wait_object.cpp | 3 ---
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ae62609e38..563a99bfc7 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -133,6 +133,7 @@ void Thread::ResumeFromWait() {
 
 void Thread::CancelWait() {
     ASSERT(GetStatus() == ThreadStatus::WaitSynch);
+    ClearWaitObjects();
     SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED);
     ResumeFromWait();
 }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 4d220c4f98..ceb4d51591 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -319,6 +319,9 @@ public:
     }
 
     void ClearWaitObjects() {
+        for (const auto& waiting_object : wait_objects) {
+            waiting_object->RemoveWaitingThread(this);
+        }
         wait_objects.clear();
     }
 
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index 50ed2a2f15..0f833fb3ac 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -85,9 +85,6 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
 
     const std::size_t index = thread->GetWaitObjectIndex(this);
 
-    for (const auto& object : thread->GetWaitObjects()) {
-        object->RemoveWaitingThread(thread.get());
-    }
     thread->ClearWaitObjects();
 
     thread->CancelWakeupTimer();

From 96b1b144afff4ae4dd2d33547b8a62c46c920a84 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 10 Oct 2019 08:50:41 -0400
Subject: [PATCH 21/29] Kernel: Correct Paused scheduling

---
 src/core/hle/kernel/thread.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 563a99bfc7..0871a2f004 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -354,9 +354,7 @@ void Thread::SetActivity(ThreadActivity value) {
 
     if (value == ThreadActivity::Paused) {
         // Set status if not waiting
-        if (status == ThreadStatus::Ready) {
-            status = ThreadStatus::Paused;
-        } else if (status == ThreadStatus::Running) {
+        if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
             SetStatus(ThreadStatus::Paused);
             Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
         }

From 0b72b34d89d8e3dd06fadfded728f7202bc34741 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 07:57:32 -0400
Subject: [PATCH 22/29] KernelSVC: Assert that condition variable address is
 aligned to 4 bytes.

---
 src/core/hle/kernel/svc.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 101f72b7d4..151acf33a2 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1608,6 +1608,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
         return ERR_INVALID_ADDRESS;
     }
 
+    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
+
     auto* const current_process = system.Kernel().CurrentProcess();
     const auto& handle_table = current_process->GetHandleTable();
     SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle);
@@ -1639,6 +1641,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
     LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}",
               condition_variable_addr, target);
 
+    ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
+
     // Retrieve a list of all threads that are waiting for this condition variable.
     std::vector<SharedPtr<Thread>> waiting_threads;
     const auto& scheduler = system.GlobalScheduler();

From b3c1deba494d78158ea6764802880b249fe64416 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 08:02:34 -0400
Subject: [PATCH 23/29] Kernel_Thread: Eliminate most global accessors.

---
 src/core/hle/kernel/thread.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0871a2f004..7208bbb110 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -41,8 +41,8 @@ Thread::~Thread() = default;
 
 void Thread::Stop() {
     // Cancel any outstanding wakeup events for this thread
-    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                             callback_handle);
+    kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                 callback_handle);
     kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
     callback_handle = 0;
     SetStatus(ThreadStatus::Dead);
@@ -68,13 +68,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
     // This function might be called from any thread so we have to be cautious and use the
     // thread-safe version of ScheduleEvent.
     const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
-    Core::System::GetInstance().CoreTiming().ScheduleEvent(
-        cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle);
+    kernel.System().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(),
+                                               callback_handle);
 }
 
 void Thread::CancelWakeupTimer() {
-    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                             callback_handle);
+    kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+                                                 callback_handle);
 }
 
 static std::optional<s32> GetNextProcessorId(u64 mask) {
@@ -176,7 +176,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
         return ResultCode(-1);
     }
 
-    auto& system = Core::System::GetInstance();
+    auto& system = kernel.System();
     SharedPtr<Thread> thread(new Thread(kernel));
 
     thread->thread_id = kernel.CreateNewThreadID();
@@ -258,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
     }
 
     if (status == ThreadStatus::Running) {
-        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
+        last_running_ticks = kernel.System().CoreTiming().GetTicks();
     }
 
     status = new_status;
@@ -356,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) {
         // Set status if not waiting
         if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
             SetStatus(ThreadStatus::Paused);
-            Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+            kernel.System().CpuCore(processor_id).PrepareReschedule();
         }
     } else if (status == ThreadStatus::Paused) {
         // Ready to reschedule
@@ -476,7 +476,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
     if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
         return;
     }
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.System().GlobalScheduler();
     if (processor_id >= 0) {
         scheduler.Unschedule(old_priority, processor_id, this);
     }
@@ -508,7 +508,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
 }
 
 void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
-    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
+    auto& scheduler = kernel.System().GlobalScheduler();
     if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
         current_priority >= THREADPRIO_COUNT) {
         return;

From 25f8606a6dab595eb7a92fce9be32e0489079964 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 08:21:51 -0400
Subject: [PATCH 24/29] Kernel Scheduler: Make sure the global scheduler
 shutdowns correctly.

---
 src/common/multi_level_queue.h    | 7 +++++++
 src/core/core_cpu.cpp             | 4 ++++
 src/core/core_cpu.h               | 2 ++
 src/core/cpu_core_manager.cpp     | 1 +
 src/core/hle/kernel/kernel.cpp    | 2 ++
 src/core/hle/kernel/scheduler.cpp | 8 ++++++++
 src/core/hle/kernel/scheduler.h   | 7 +++++++
 7 files changed, 31 insertions(+)

diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h
index 9cb448f567..50acfdbf2f 100644
--- a/src/common/multi_level_queue.h
+++ b/src/common/multi_level_queue.h
@@ -304,6 +304,13 @@ public:
         return levels[priority == Depth ? 63 : priority].back();
     }
 
+    void clear() {
+        used_priorities = 0;
+        for (std::size_t i = 0; i < Depth; i++) {
+            levels[i].clear();
+        }
+    }
+
 private:
     using const_list_iterator = typename std::list<T>::const_iterator;
 
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index a6f63e437a..233ea572ca 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -117,4 +117,8 @@ void Cpu::Reschedule() {
     scheduler->TryDoContextSwitch();
 }
 
+void Cpu::Shutdown() {
+    scheduler->Shutdown();
+}
+
 } // namespace Core
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 80261daf75..cafca8df7f 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -84,6 +84,8 @@ public:
         return core_index;
     }
 
+    void Shutdown();
+
     static std::unique_ptr<ExclusiveMonitor> MakeExclusiveMonitor(std::size_t num_cores);
 
 private:
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 16b384076e..8efd410bba 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -58,6 +58,7 @@ void CpuCoreManager::Shutdown() {
 
     thread_to_cpu.clear();
     for (auto& cpu_core : cores) {
+        cpu_core->Shutdown();
         cpu_core.reset();
     }
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 002c5af2bb..0d6286f845 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -116,6 +116,8 @@ struct KernelCore::Impl {
         thread_wakeup_event_type = nullptr;
         preemption_event = nullptr;
 
+        global_scheduler.Shutdown();
+
         named_ports.clear();
     }
 
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 226d15d888..122106267a 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -342,6 +342,14 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, T
     }
 }
 
+void GlobalScheduler::Shutdown() {
+    for (std::size_t core = 0; core < NUM_CPU_CORES; core++) {
+        scheduled_queue[core].clear();
+        suggested_queue[core].clear();
+    }
+    thread_list.clear();
+}
+
 GlobalScheduler::~GlobalScheduler() = default;
 
 Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id)
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 408e20c883..617553ae36 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -147,6 +147,8 @@ public:
         return reselection_pending.load();
     }
 
+    void Shutdown();
+
 private:
     bool AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner);
 
@@ -189,6 +191,11 @@ public:
         return context_switch_pending;
     }
 
+    void Shutdown() {
+        current_thread = nullptr;
+        selected_thread = nullptr;
+    }
+
 private:
     friend class GlobalScheduler;
     /**

From 3073615dbc214a53badc88da68eecbaaa73898de Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 10:13:25 -0400
Subject: [PATCH 25/29] Kernel: Address Feedback.

---
 src/core/hle/kernel/kernel.h      |  2 +-
 src/core/hle/kernel/scheduler.cpp | 53 ++++++++++++++++++++++---
 src/core/hle/kernel/scheduler.h   | 65 ++++++++++++-------------------
 src/core/hle/kernel/svc.cpp       | 12 +++---
 src/core/hle/kernel/thread.cpp    | 17 ++++----
 src/core/hle/kernel/thread.h      | 16 ++++++--
 6 files changed, 98 insertions(+), 67 deletions(-)

diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 0fc4d1f363..9fb8f52ec9 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -21,11 +21,11 @@ namespace Kernel {
 
 class AddressArbiter;
 class ClientPort;
+class GlobalScheduler;
 class HandleTable;
 class Process;
 class ResourceLimit;
 class Thread;
-class GlobalScheduler;
 
 /// Represents a single instance of the kernel.
 class KernelCore {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 122106267a..dabeb05d68 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -23,7 +23,7 @@
 namespace Kernel {
 
 GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {
-    reselection_pending = false;
+    is_reselection_pending = false;
 }
 
 void GlobalScheduler::AddThread(SharedPtr<Thread> thread) {
@@ -61,7 +61,7 @@ void GlobalScheduler::SelectThread(u32 core) {
             }
             sched.selected_thread = thread;
         }
-        sched.context_switch_pending = sched.selected_thread != sched.current_thread;
+        sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;
         std::atomic_thread_fence(std::memory_order_seq_cst);
     };
     Scheduler& sched = system.Scheduler(core);
@@ -318,10 +318,18 @@ void GlobalScheduler::PreemptThreads() {
             }
         }
 
-        reselection_pending.store(true, std::memory_order_release);
+        is_reselection_pending.store(true, std::memory_order_release);
     }
 }
 
+void GlobalScheduler::Suggest(u32 priority, u32 core, Thread* thread) {
+    suggested_queue[core].add(thread, priority);
+}
+
+void GlobalScheduler::Unsuggest(u32 priority, u32 core, Thread* thread) {
+    suggested_queue[core].remove(thread, priority);
+}
+
 void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) {
     ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core.");
     scheduled_queue[core].add(thread, priority);
@@ -332,12 +340,40 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) {
     scheduled_queue[core].add(thread, priority, false);
 }
 
+void GlobalScheduler::Reschedule(u32 priority, u32 core, Thread* thread) {
+    scheduled_queue[core].remove(thread, priority);
+    scheduled_queue[core].add(thread, priority);
+}
+
+void GlobalScheduler::Unschedule(u32 priority, u32 core, Thread* thread) {
+    scheduled_queue[core].remove(thread, priority);
+}
+
+void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
+    const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
+    const s32 source_core = thread->GetProcessorID();
+    if (source_core == destination_core || !schedulable) {
+        return;
+    }
+    thread->SetProcessorID(destination_core);
+    if (source_core >= 0) {
+        Unschedule(priority, source_core, thread);
+    }
+    if (destination_core >= 0) {
+        Unsuggest(priority, destination_core, thread);
+        Schedule(priority, destination_core, thread);
+    }
+    if (source_core >= 0) {
+        Suggest(priority, source_core, thread);
+    }
+}
+
 bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) {
     if (current_thread == winner) {
         current_thread->IncrementYieldCount();
         return true;
     } else {
-        reselection_pending.store(true, std::memory_order_release);
+        is_reselection_pending.store(true, std::memory_order_release);
         return false;
     }
 }
@@ -378,7 +414,7 @@ u64 Scheduler::GetLastContextSwitchTicks() const {
 }
 
 void Scheduler::TryDoContextSwitch() {
-    if (context_switch_pending) {
+    if (is_context_switch_pending ) {
         SwitchContext();
     }
 }
@@ -409,7 +445,7 @@ void Scheduler::SwitchContext() {
     Thread* const previous_thread = GetCurrentThread();
     Thread* const new_thread = GetSelectedThread();
 
-    context_switch_pending = false;
+    is_context_switch_pending = false;
     if (new_thread == previous_thread) {
         return;
     }
@@ -477,4 +513,9 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
     last_context_switch_time = most_recent_switch_ticks;
 }
 
+void Scheduler::Shutdown() {
+    current_thread = nullptr;
+    selected_thread = nullptr;
+}
+
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 617553ae36..fcae28e0a4 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -39,15 +39,11 @@ public:
 
     // Add a thread to the suggested queue of a cpu core. Suggested threads may be
     // picked if no thread is scheduled to run on the core.
-    void Suggest(u32 priority, u32 core, Thread* thread) {
-        suggested_queue[core].add(thread, priority);
-    }
+    void Suggest(u32 priority, u32 core, Thread* thread);
 
     // Remove a thread to the suggested queue of a cpu core. Suggested threads may be
     // picked if no thread is scheduled to run on the core.
-    void Unsuggest(u32 priority, u32 core, Thread* thread) {
-        suggested_queue[core].remove(thread, priority);
-    }
+    void Unsuggest(u32 priority, u32 core, Thread* thread);
 
     // Add a thread to the scheduling queue of a cpu core. The thread is added at the
     // back the queue in its priority level
@@ -58,37 +54,15 @@ public:
     void SchedulePrepend(u32 priority, u32 core, Thread* thread);
 
     // Reschedule an already scheduled thread based on a new priority
-    void Reschedule(u32 priority, u32 core, Thread* thread) {
-        scheduled_queue[core].remove(thread, priority);
-        scheduled_queue[core].add(thread, priority);
-    }
+    void Reschedule(u32 priority, u32 core, Thread* thread);
 
     // Unschedule a thread.
-    void Unschedule(u32 priority, u32 core, Thread* thread) {
-        scheduled_queue[core].remove(thread, priority);
-    }
+    void Unschedule(u32 priority, u32 core, Thread* thread);
 
     // Transfers a thread into an specific core. If the destination_core is -1
     // it will be unscheduled from its source code and added into its suggested
     // queue.
-    void TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
-        const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
-        const s32 source_core = thread->GetProcessorID();
-        if (source_core == destination_core || !schedulable) {
-            return;
-        }
-        thread->SetProcessorID(destination_core);
-        if (source_core >= 0) {
-            Unschedule(priority, source_core, thread);
-        }
-        if (destination_core >= 0) {
-            Unsuggest(priority, destination_core, thread);
-            Schedule(priority, destination_core, thread);
-        }
-        if (source_core >= 0) {
-            Suggest(priority, source_core, thread);
-        }
-    }
+    void TransferToCore(u32 priority, s32 destination_core, Thread* thread);
 
     /*
      * UnloadThread selects a core and forces it to unload its current thread's context
@@ -133,6 +107,12 @@ public:
      */
     bool YieldThreadAndWaitForLoadBalancing(Thread* thread);
 
+    /*
+     * PreemptThreads this operation rotates the scheduling queues of threads at
+     * a preemption priority and then does some core rebalancing. Preemption priorities
+     * can be found in the array 'preemption_priorities'. This operation happens
+     * every 10ms.
+     */
     void PreemptThreads();
 
     u32 CpuCoresCount() const {
@@ -140,11 +120,11 @@ public:
     }
 
     void SetReselectionPending() {
-        reselection_pending.store(true, std::memory_order_release);
+        is_reselection_pending.store(true, std::memory_order_release);
     }
 
     bool IsReselectionPending() const {
-        return reselection_pending.load();
+        return is_reselection_pending.load(std::memory_order_acquire);
     }
 
     void Shutdown();
@@ -155,8 +135,10 @@ private:
     static constexpr u32 min_regular_priority = 2;
     std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> scheduled_queue;
     std::array<Common::MultiLevelQueue<Thread*, THREADPRIO_COUNT>, NUM_CPU_CORES> suggested_queue;
-    std::atomic<bool> reselection_pending;
+    std::atomic<bool> is_reselection_pending;
 
+    // `preemption_priorities` are the priority levels at which the global scheduler
+    // preempts threads every 10 ms. They are ordered from Core 0 to Core 3
     std::array<u32, NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
 
     /// Lists all thread ids that aren't deleted/etc.
@@ -166,7 +148,7 @@ private:
 
 class Scheduler final {
 public:
-    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 core_id);
+    explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id);
     ~Scheduler();
 
     /// Returns whether there are any threads that are ready to run.
@@ -175,26 +157,27 @@ public:
     /// Reschedules to the next available thread (call after current thread is suspended)
     void TryDoContextSwitch();
 
+    /// Unloads currently running thread
     void UnloadThread();
 
+    /// Select the threads in top of the scheduling multilist.
     void SelectThreads();
 
     /// Gets the current running thread
     Thread* GetCurrentThread() const;
 
+    /// Gets the currently selected thread from the top of the multilevel queue
     Thread* GetSelectedThread() const;
 
     /// Gets the timestamp for the last context switch in ticks.
     u64 GetLastContextSwitchTicks() const;
 
     bool ContextSwitchPending() const {
-        return context_switch_pending;
+        return is_context_switch_pending;
     }
 
-    void Shutdown() {
-        current_thread = nullptr;
-        selected_thread = nullptr;
-    }
+    /// Shutdowns the scheduler.
+    void Shutdown();
 
 private:
     friend class GlobalScheduler;
@@ -226,7 +209,7 @@ private:
     u64 idle_selection_count = 0;
     const u32 core_id;
 
-    bool context_switch_pending = false;
+    bool is_context_switch_pending = false;
 };
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 151acf33a2..f64236be1a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1556,18 +1556,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
 
     auto& scheduler = system.CurrentScheduler();
     auto* const current_thread = scheduler.GetCurrentThread();
-    bool redundant = false;
+    bool is_redundant = false;
 
     if (nanoseconds <= 0) {
         switch (static_cast<SleepType>(nanoseconds)) {
         case SleepType::YieldWithoutLoadBalancing:
-            redundant = current_thread->YieldSimple();
+            is_redundant = current_thread->YieldSimple();
             break;
         case SleepType::YieldWithLoadBalancing:
-            redundant = current_thread->YieldAndBalanceLoad();
+            is_redundant = current_thread->YieldAndBalanceLoad();
             break;
         case SleepType::YieldAndWaitForLoadBalancing:
-            redundant = current_thread->YieldAndWaitForLoadBalancing();
+            is_redundant = current_thread->YieldAndWaitForLoadBalancing();
             break;
         default:
             UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
@@ -1576,9 +1576,9 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
         current_thread->Sleep(nanoseconds);
     }
 
-    if (redundant) {
+    if (is_redundant) {
         // If it's redundant, the core is pretty much idle. Some games keep idling
-        // a core while it's doing nothing, we advance timing to avoid costly continuos
+        // a core while it's doing nothing, we advance timing to avoid costly continuous
         // calls.
         system.CoreTiming().AddTicks(2000);
     }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 7208bbb110..8663fe5eed 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -389,13 +389,13 @@ bool Thread::YieldAndWaitForLoadBalancing() {
 
 void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
     const u32 old_flags = scheduling_state;
-    scheduling_state =
-        (scheduling_state & ThreadSchedMasks::HighMask) | static_cast<u32>(new_status);
+    scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
+                       static_cast<u32>(new_status);
     AdjustSchedulingOnStatus(old_flags);
 }
 
 void Thread::SetCurrentPriority(u32 new_priority) {
-    u32 old_priority = std::exchange(current_priority, new_priority);
+    const u32 old_priority = std::exchange(current_priority, new_priority);
     AdjustSchedulingOnPriority(old_priority);
 }
 
@@ -410,10 +410,9 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     };
 
     const bool use_override = affinity_override_count != 0;
-    // The value -3 is "do not change the ideal core".
-    if (new_core == -3) {
+    if (new_core == static_cast<s32>(CoreFlags::DontChangeIdealCore)) {
         new_core = use_override ? ideal_core_override : ideal_core;
-        if ((new_affinity_mask & (1 << new_core)) == 0) {
+        if ((new_affinity_mask & (1ULL << new_core)) == 0) {
             return ERR_INVALID_COMBINATION;
         }
     }
@@ -444,14 +443,14 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
     }
 
     auto& scheduler = kernel.GlobalScheduler();
-    if (static_cast<ThreadSchedStatus>(old_flags & ThreadSchedMasks::LowMask) ==
+    if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
         ThreadSchedStatus::Runnable) {
         // In this case the thread was running, now it's pausing/exitting
         if (processor_id >= 0) {
             scheduler.Unschedule(current_priority, processor_id, this);
         }
 
-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
             if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
                 scheduler.Unsuggest(current_priority, core, this);
             }
@@ -462,7 +461,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
             scheduler.Schedule(current_priority, processor_id, this);
         }
 
-        for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
+        for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
             if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
                 scheduler.Suggest(current_priority, core, this);
             }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index ceb4d51591..e0f3b62047 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -82,19 +82,25 @@ enum class ThreadSchedStatus : u32 {
     Exited = 3,
 };
 
-enum ThreadSchedFlags : u32 {
+enum class ThreadSchedFlags : u32 {
     ProcessPauseFlag = 1 << 4,
     ThreadPauseFlag = 1 << 5,
     ProcessDebugPauseFlag = 1 << 6,
     KernelInitPauseFlag = 1 << 8,
 };
 
-enum ThreadSchedMasks : u32 {
+enum class ThreadSchedMasks : u32 {
     LowMask = 0x000f,
     HighMask = 0xfff0,
     ForcePauseMask = 0x0070,
 };
 
+enum class CoreFlags : s32 {
+    IgnoreIdealCore = -1,
+    ProcessIdealCore = -2,
+    DontChangeIdealCore = -3,
+};
+
 class Thread final : public WaitObject {
 public:
     using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
@@ -428,7 +434,8 @@ public:
     }
 
     ThreadSchedStatus GetSchedulingStatus() const {
-        return static_cast<ThreadSchedStatus>(scheduling_state & ThreadSchedMasks::LowMask);
+        return static_cast<ThreadSchedStatus>(scheduling_state &
+                                              static_cast<u32>(ThreadSchedMasks::LowMask));
     }
 
     bool IsRunning() const {
@@ -471,7 +478,8 @@ private:
 
     u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
     u64 last_running_ticks = 0;   ///< CPU tick when thread was last running
-    u64 yield_count = 0;          ///< Number of innecessaries yields occured.
+    u64 yield_count = 0;          ///< Number of redundant yields carried by this thread.
+                                  ///< a redundant yield is one where no scheduling is changed
 
     s32 processor_id = 0;
 

From c32520ceb7cf2180fbbed11e9bd5f9df03409e1d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 10:21:33 -0400
Subject: [PATCH 26/29] Kernel: Reverse global accessor removal.

---
 src/core/hle/kernel/kernel.cpp      |  8 --------
 src/core/hle/kernel/kernel.h        |  6 ------
 src/core/hle/kernel/thread.cpp      | 16 ++++++++--------
 src/core/hle/kernel/wait_object.cpp |  2 +-
 4 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 0d6286f845..f94ac150d0 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -232,14 +232,6 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
     return impl->global_scheduler;
 }
 
-Core::System& KernelCore::System() {
-    return impl->system;
-}
-
-const Core::System& KernelCore::System() const {
-    return impl->system;
-}
-
 void KernelCore::AddNamedPort(std::string name, SharedPtr<ClientPort> port) {
     impl->named_ports.emplace(std::move(name), std::move(port));
 }
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 9fb8f52ec9..c4397fc77a 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -82,12 +82,6 @@ public:
     /// Gets the sole instance of the global scheduler
     const Kernel::GlobalScheduler& GlobalScheduler() const;
 
-    /// Gets the sole instance of the system
-    Core::System& System();
-
-    /// Gets the sole instance of the system
-    const Core::System& System() const;
-
     /// Adds a port to the named port table
     void AddNamedPort(std::string name, SharedPtr<ClientPort> port);
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 8663fe5eed..0c11da1e0e 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -41,7 +41,7 @@ Thread::~Thread() = default;
 
 void Thread::Stop() {
     // Cancel any outstanding wakeup events for this thread
-    kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
                                                  callback_handle);
     kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
     callback_handle = 0;
@@ -68,12 +68,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
     // This function might be called from any thread so we have to be cautious and use the
     // thread-safe version of ScheduleEvent.
     const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
-    kernel.System().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(),
+    Core::System::GetInstance().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(),
                                                callback_handle);
 }
 
 void Thread::CancelWakeupTimer() {
-    kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
+    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
                                                  callback_handle);
 }
 
@@ -176,7 +176,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
         return ResultCode(-1);
     }
 
-    auto& system = kernel.System();
+    auto& system = Core::System::GetInstance();
     SharedPtr<Thread> thread(new Thread(kernel));
 
     thread->thread_id = kernel.CreateNewThreadID();
@@ -258,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) {
     }
 
     if (status == ThreadStatus::Running) {
-        last_running_ticks = kernel.System().CoreTiming().GetTicks();
+        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
     }
 
     status = new_status;
@@ -356,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) {
         // Set status if not waiting
         if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
             SetStatus(ThreadStatus::Paused);
-            kernel.System().CpuCore(processor_id).PrepareReschedule();
+            Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
         }
     } else if (status == ThreadStatus::Paused) {
         // Ready to reschedule
@@ -475,7 +475,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
     if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
         return;
     }
-    auto& scheduler = kernel.System().GlobalScheduler();
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
     if (processor_id >= 0) {
         scheduler.Unschedule(old_priority, processor_id, this);
     }
@@ -507,7 +507,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
 }
 
 void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
-    auto& scheduler = kernel.System().GlobalScheduler();
+    auto& scheduler = Core::System::GetInstance().GlobalScheduler();
     if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
         current_priority >= THREADPRIO_COUNT) {
         return;
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp
index 0f833fb3ac..c00cef0629 100644
--- a/src/core/hle/kernel/wait_object.cpp
+++ b/src/core/hle/kernel/wait_object.cpp
@@ -95,7 +95,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr<Thread> thread) {
     }
     if (resume) {
         thread->ResumeFromWait();
-        kernel.System().PrepareReschedule(thread->GetProcessorID());
+        Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID());
     }
 }
 

From a3524879be351f3726a622217d5c2d928ae92b42 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 10:28:44 -0400
Subject: [PATCH 27/29] Kernel: Clang Format

---
 src/core/hle/kernel/scheduler.cpp | 2 +-
 src/core/hle/kernel/thread.cpp    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index dabeb05d68..e6dcb96394 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -414,7 +414,7 @@ u64 Scheduler::GetLastContextSwitchTicks() const {
 }
 
 void Scheduler::TryDoContextSwitch() {
-    if (is_context_switch_pending ) {
+    if (is_context_switch_pending) {
         SwitchContext();
     }
 }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0c11da1e0e..3408658e5a 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -42,7 +42,7 @@ Thread::~Thread() = default;
 void Thread::Stop() {
     // Cancel any outstanding wakeup events for this thread
     Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                 callback_handle);
+                                                             callback_handle);
     kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
     callback_handle = 0;
     SetStatus(ThreadStatus::Dead);
@@ -68,13 +68,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
     // This function might be called from any thread so we have to be cautious and use the
     // thread-safe version of ScheduleEvent.
     const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
-    Core::System::GetInstance().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(),
-                                               callback_handle);
+    Core::System::GetInstance().CoreTiming().ScheduleEvent(
+        cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle);
 }
 
 void Thread::CancelWakeupTimer() {
     Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
-                                                 callback_handle);
+                                                             callback_handle);
 }
 
 static std::optional<s32> GetNextProcessorId(u64 mask) {

From e28c7f521765a85e27259539f0873b15c18a98f8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 10:38:55 -0400
Subject: [PATCH 28/29] Kernel: Address Feedback 2

---
 src/core/hle/kernel/thread.cpp | 6 +++---
 src/core/hle/kernel/thread.h   | 9 +++------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 3408658e5a..aeb20b24b0 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -410,7 +410,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     };
 
     const bool use_override = affinity_override_count != 0;
-    if (new_core == static_cast<s32>(CoreFlags::DontChangeIdealCore)) {
+    if (new_core == THREADDONTCHANGE_IDEAL) {
         new_core = use_override ? ideal_core_override : ideal_core;
         if ((new_affinity_mask & (1ULL << new_core)) == 0) {
             return ERR_INVALID_COMBINATION;
@@ -452,7 +452,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
 
         for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
             if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
-                scheduler.Unsuggest(current_priority, core, this);
+                scheduler.Unsuggest(current_priority, static_cast<u32>(core), this);
             }
         }
     } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
@@ -463,7 +463,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
 
         for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) {
             if (core != processor_id && ((affinity_mask >> core) & 1) != 0) {
-                scheduler.Suggest(current_priority, core, this);
+                scheduler.Suggest(current_priority, static_cast<u32>(core), this);
             }
         }
     }
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index e0f3b62047..7ee437e174 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -35,6 +35,9 @@ enum ThreadProcessorId : s32 {
     /// Run thread on the ideal core specified by the process.
     THREADPROCESSORID_IDEAL = -2,
 
+    /// when setting Core and Affiny, keeps the ideal core intact
+    THREADDONTCHANGE_IDEAL = -3,
+
     /// Indicates that the preferred processor ID shouldn't be updated in
     /// a core mask setting operation.
     THREADPROCESSORID_DONT_UPDATE = -3,
@@ -95,12 +98,6 @@ enum class ThreadSchedMasks : u32 {
     ForcePauseMask = 0x0070,
 };
 
-enum class CoreFlags : s32 {
-    IgnoreIdealCore = -1,
-    ProcessIdealCore = -2,
-    DontChangeIdealCore = -3,
-};
-
 class Thread final : public WaitObject {
 public:
     using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;

From 64e652d8cbcc4cc67442879ab7e379d62b72703c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 12 Oct 2019 10:55:34 -0400
Subject: [PATCH 29/29] Kernel Thread: Cleanup THREADPROCESSORID_DONT_UPDATE.

---
 src/core/hle/kernel/thread.cpp | 2 +-
 src/core/hle/kernel/thread.h   | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index aeb20b24b0..962530d2d2 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -410,7 +410,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
     };
 
     const bool use_override = affinity_override_count != 0;
-    if (new_core == THREADDONTCHANGE_IDEAL) {
+    if (new_core == THREADPROCESSORID_DONT_UPDATE) {
         new_core = use_override ? ideal_core_override : ideal_core;
         if ((new_affinity_mask & (1ULL << new_core)) == 0) {
             return ERR_INVALID_COMBINATION;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 7ee437e174..c9870873d5 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -35,9 +35,6 @@ enum ThreadProcessorId : s32 {
     /// Run thread on the ideal core specified by the process.
     THREADPROCESSORID_IDEAL = -2,
 
-    /// when setting Core and Affiny, keeps the ideal core intact
-    THREADDONTCHANGE_IDEAL = -3,
-
     /// Indicates that the preferred processor ID shouldn't be updated in
     /// a core mask setting operation.
     THREADPROCESSORID_DONT_UPDATE = -3,