Merge pull request #9889 from Morph1984/time-is-ticking
core_timing: Reduce CPU usage on Windows
This commit is contained in:
commit
a7792e5ff8
16 changed files with 324 additions and 65 deletions
|
@ -477,8 +477,8 @@ if (APPLE)
|
||||||
find_library(COCOA_LIBRARY Cocoa)
|
find_library(COCOA_LIBRARY Cocoa)
|
||||||
set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
|
set(PLATFORM_LIBRARIES ${COCOA_LIBRARY} ${IOKIT_LIBRARY} ${COREVIDEO_LIBRARY})
|
||||||
elseif (WIN32)
|
elseif (WIN32)
|
||||||
# WSAPoll and SHGetKnownFolderPath (AppData/Roaming) didn't exist before WinNT 6.x (Vista)
|
# Target Windows 10
|
||||||
add_definitions(-D_WIN32_WINNT=0x0600 -DWINVER=0x0600)
|
add_definitions(-D_WIN32_WINNT=0x0A00 -DWINVER=0x0A00)
|
||||||
set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
|
set(PLATFORM_LIBRARIES winmm ws2_32 iphlpapi)
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
# PSAPI is the Process Status API
|
# PSAPI is the Process Status API
|
||||||
|
|
6
dist/yuzu.manifest
vendored
6
dist/yuzu.manifest
vendored
|
@ -36,12 +36,6 @@ SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
<application>
|
<application>
|
||||||
<!-- Windows 10 -->
|
<!-- Windows 10 -->
|
||||||
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
||||||
<!-- Windows 8.1 -->
|
|
||||||
<supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
|
|
||||||
<!-- Windows 8 -->
|
|
||||||
<supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
|
|
||||||
<!-- Windows 7 -->
|
|
||||||
<supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
|
|
||||||
</application>
|
</application>
|
||||||
</compatibility>
|
</compatibility>
|
||||||
<trustInfo
|
<trustInfo
|
||||||
|
|
|
@ -113,6 +113,8 @@ add_library(common STATIC
|
||||||
socket_types.h
|
socket_types.h
|
||||||
spin_lock.cpp
|
spin_lock.cpp
|
||||||
spin_lock.h
|
spin_lock.h
|
||||||
|
steady_clock.cpp
|
||||||
|
steady_clock.h
|
||||||
stream.cpp
|
stream.cpp
|
||||||
stream.h
|
stream.h
|
||||||
string_util.cpp
|
string_util.cpp
|
||||||
|
@ -142,6 +144,14 @@ add_library(common STATIC
|
||||||
zstd_compression.h
|
zstd_compression.h
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
target_sources(common PRIVATE
|
||||||
|
windows/timer_resolution.cpp
|
||||||
|
windows/timer_resolution.h
|
||||||
|
)
|
||||||
|
target_link_libraries(common PRIVATE ntdll)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(ARCHITECTURE_x86_64)
|
if(ARCHITECTURE_x86_64)
|
||||||
target_sources(common
|
target_sources(common
|
||||||
PRIVATE
|
PRIVATE
|
||||||
|
|
56
src/common/steady_clock.cpp
Normal file
56
src/common/steady_clock.cpp
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/steady_clock.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
static s64 WindowsQueryPerformanceFrequency() {
|
||||||
|
LARGE_INTEGER frequency;
|
||||||
|
QueryPerformanceFrequency(&frequency);
|
||||||
|
return frequency.QuadPart;
|
||||||
|
}
|
||||||
|
|
||||||
|
static s64 WindowsQueryPerformanceCounter() {
|
||||||
|
LARGE_INTEGER counter;
|
||||||
|
QueryPerformanceCounter(&counter);
|
||||||
|
return counter.QuadPart;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SteadyClock::time_point SteadyClock::Now() noexcept {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
static const auto freq = WindowsQueryPerformanceFrequency();
|
||||||
|
const auto counter = WindowsQueryPerformanceCounter();
|
||||||
|
|
||||||
|
// 10 MHz is a very common QPC frequency on modern PCs.
|
||||||
|
// Optimizing for this specific frequency can double the performance of
|
||||||
|
// this function by avoiding the expensive frequency conversion path.
|
||||||
|
static constexpr s64 TenMHz = 10'000'000;
|
||||||
|
|
||||||
|
if (freq == TenMHz) [[likely]] {
|
||||||
|
static_assert(period::den % TenMHz == 0);
|
||||||
|
static constexpr s64 Multiplier = period::den / TenMHz;
|
||||||
|
return time_point{duration{counter * Multiplier}};
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto whole = (counter / freq) * period::den;
|
||||||
|
const auto part = (counter % freq) * period::den / freq;
|
||||||
|
return time_point{duration{whole + part}};
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
return time_point{duration{clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW)}};
|
||||||
|
#else
|
||||||
|
timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return time_point{std::chrono::seconds{ts.tv_sec} + std::chrono::nanoseconds{ts.tv_nsec}};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // namespace Common
|
23
src/common/steady_clock.h
Normal file
23
src/common/steady_clock.h
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
struct SteadyClock {
|
||||||
|
using rep = s64;
|
||||||
|
using period = std::nano;
|
||||||
|
using duration = std::chrono::nanoseconds;
|
||||||
|
using time_point = std::chrono::time_point<SteadyClock>;
|
||||||
|
|
||||||
|
static constexpr bool is_steady = true;
|
||||||
|
|
||||||
|
[[nodiscard]] static time_point Now() noexcept;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -1,6 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include "common/steady_clock.h"
|
||||||
#include "common/uint128.h"
|
#include "common/uint128.h"
|
||||||
#include "common/wall_clock.h"
|
#include "common/wall_clock.h"
|
||||||
|
|
||||||
|
@ -11,45 +12,32 @@
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
using base_timer = std::chrono::steady_clock;
|
|
||||||
using base_time_point = std::chrono::time_point<base_timer>;
|
|
||||||
|
|
||||||
class StandardWallClock final : public WallClock {
|
class StandardWallClock final : public WallClock {
|
||||||
public:
|
public:
|
||||||
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
|
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
|
||||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
|
: WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false},
|
||||||
start_time = base_timer::now();
|
start_time{SteadyClock::Now()} {}
|
||||||
}
|
|
||||||
|
|
||||||
std::chrono::nanoseconds GetTimeNS() override {
|
std::chrono::nanoseconds GetTimeNS() override {
|
||||||
base_time_point current = base_timer::now();
|
return SteadyClock::Now() - start_time;
|
||||||
auto elapsed = current - start_time;
|
|
||||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::microseconds GetTimeUS() override {
|
std::chrono::microseconds GetTimeUS() override {
|
||||||
base_time_point current = base_timer::now();
|
return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS());
|
||||||
auto elapsed = current - start_time;
|
|
||||||
return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::milliseconds GetTimeMS() override {
|
std::chrono::milliseconds GetTimeMS() override {
|
||||||
base_time_point current = base_timer::now();
|
return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS());
|
||||||
auto elapsed = current - start_time;
|
|
||||||
return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GetClockCycles() override {
|
u64 GetClockCycles() override {
|
||||||
std::chrono::nanoseconds time_now = GetTimeNS();
|
const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency);
|
||||||
const u128 temporary =
|
return Common::Divide128On32(temp, NS_RATIO).first;
|
||||||
Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
|
|
||||||
return Common::Divide128On32(temporary, 1000000000).first;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 GetCPUCycles() override {
|
u64 GetCPUCycles() override {
|
||||||
std::chrono::nanoseconds time_now = GetTimeNS();
|
const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency);
|
||||||
const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
|
return Common::Divide128On32(temp, NS_RATIO).first;
|
||||||
return Common::Divide128On32(temporary, 1000000000).first;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Pause([[maybe_unused]] bool is_paused) override {
|
void Pause([[maybe_unused]] bool is_paused) override {
|
||||||
|
@ -57,7 +45,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
base_time_point start_time;
|
SteadyClock::time_point start_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
@ -93,4 +81,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||||
|
u64 emulated_clock_frequency) {
|
||||||
|
return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -55,4 +55,7 @@ private:
|
||||||
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
||||||
u64 emulated_clock_frequency);
|
u64 emulated_clock_frequency);
|
||||||
|
|
||||||
|
[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||||
|
u64 emulated_clock_frequency);
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
109
src/common/windows/timer_resolution.cpp
Normal file
109
src/common/windows/timer_resolution.cpp
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <windows.h>
|
||||||
|
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtQueryTimerResolution.html
|
||||||
|
NTSYSAPI LONG NTAPI NtQueryTimerResolution(PULONG MinimumResolution, PULONG MaximumResolution,
|
||||||
|
PULONG CurrentResolution);
|
||||||
|
|
||||||
|
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FTime%2FNtSetTimerResolution.html
|
||||||
|
NTSYSAPI LONG NTAPI NtSetTimerResolution(ULONG DesiredResolution, BOOLEAN SetResolution,
|
||||||
|
PULONG CurrentResolution);
|
||||||
|
|
||||||
|
// http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FThread%2FNtDelayExecution.html
|
||||||
|
NTSYSAPI LONG NTAPI NtDelayExecution(BOOLEAN Alertable, PLARGE_INTEGER DelayInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defines for compatibility with older Windows 10 SDKs.
|
||||||
|
|
||||||
|
#ifndef PROCESS_POWER_THROTTLING_EXECUTION_SPEED
|
||||||
|
#define PROCESS_POWER_THROTTLING_EXECUTION_SPEED 0x1
|
||||||
|
#endif
|
||||||
|
#ifndef PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION
|
||||||
|
#define PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION 0x4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace Common::Windows {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using namespace std::chrono;
|
||||||
|
|
||||||
|
constexpr nanoseconds ToNS(ULONG hundred_ns) {
|
||||||
|
return nanoseconds{hundred_ns * 100};
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr ULONG ToHundredNS(nanoseconds ns) {
|
||||||
|
return static_cast<ULONG>(ns.count()) / 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TimerResolution {
|
||||||
|
std::chrono::nanoseconds minimum;
|
||||||
|
std::chrono::nanoseconds maximum;
|
||||||
|
std::chrono::nanoseconds current;
|
||||||
|
};
|
||||||
|
|
||||||
|
TimerResolution GetTimerResolution() {
|
||||||
|
ULONG MinimumTimerResolution;
|
||||||
|
ULONG MaximumTimerResolution;
|
||||||
|
ULONG CurrentTimerResolution;
|
||||||
|
NtQueryTimerResolution(&MinimumTimerResolution, &MaximumTimerResolution,
|
||||||
|
&CurrentTimerResolution);
|
||||||
|
return {
|
||||||
|
.minimum{ToNS(MinimumTimerResolution)},
|
||||||
|
.maximum{ToNS(MaximumTimerResolution)},
|
||||||
|
.current{ToNS(CurrentTimerResolution)},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetHighQoS() {
|
||||||
|
// https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service
|
||||||
|
PROCESS_POWER_THROTTLING_STATE PowerThrottling{
|
||||||
|
.Version{PROCESS_POWER_THROTTLING_CURRENT_VERSION},
|
||||||
|
.ControlMask{PROCESS_POWER_THROTTLING_EXECUTION_SPEED |
|
||||||
|
PROCESS_POWER_THROTTLING_IGNORE_TIMER_RESOLUTION},
|
||||||
|
.StateMask{},
|
||||||
|
};
|
||||||
|
SetProcessInformation(GetCurrentProcess(), ProcessPowerThrottling, &PowerThrottling,
|
||||||
|
sizeof(PROCESS_POWER_THROTTLING_STATE));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
nanoseconds GetMinimumTimerResolution() {
|
||||||
|
return GetTimerResolution().minimum;
|
||||||
|
}
|
||||||
|
|
||||||
|
nanoseconds GetMaximumTimerResolution() {
|
||||||
|
return GetTimerResolution().maximum;
|
||||||
|
}
|
||||||
|
|
||||||
|
nanoseconds GetCurrentTimerResolution() {
|
||||||
|
return GetTimerResolution().current;
|
||||||
|
}
|
||||||
|
|
||||||
|
nanoseconds SetCurrentTimerResolution(nanoseconds timer_resolution) {
|
||||||
|
// Set the timer resolution, and return the current timer resolution.
|
||||||
|
const auto DesiredTimerResolution = ToHundredNS(timer_resolution);
|
||||||
|
ULONG CurrentTimerResolution;
|
||||||
|
NtSetTimerResolution(DesiredTimerResolution, TRUE, &CurrentTimerResolution);
|
||||||
|
return ToNS(CurrentTimerResolution);
|
||||||
|
}
|
||||||
|
|
||||||
|
nanoseconds SetCurrentTimerResolutionToMaximum() {
|
||||||
|
SetHighQoS();
|
||||||
|
return SetCurrentTimerResolution(GetMaximumTimerResolution());
|
||||||
|
}
|
||||||
|
|
||||||
|
void SleepForOneTick() {
|
||||||
|
LARGE_INTEGER DelayInterval{
|
||||||
|
.QuadPart{-1},
|
||||||
|
};
|
||||||
|
NtDelayExecution(FALSE, &DelayInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common::Windows
|
38
src/common/windows/timer_resolution.h
Normal file
38
src/common/windows/timer_resolution.h
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
namespace Common::Windows {
|
||||||
|
|
||||||
|
/// Returns the minimum (least precise) supported timer resolution in nanoseconds.
|
||||||
|
std::chrono::nanoseconds GetMinimumTimerResolution();
|
||||||
|
|
||||||
|
/// Returns the maximum (most precise) supported timer resolution in nanoseconds.
|
||||||
|
std::chrono::nanoseconds GetMaximumTimerResolution();
|
||||||
|
|
||||||
|
/// Returns the current timer resolution in nanoseconds.
|
||||||
|
std::chrono::nanoseconds GetCurrentTimerResolution();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the current timer resolution.
|
||||||
|
*
|
||||||
|
* @param timer_resolution Timer resolution in nanoseconds.
|
||||||
|
*
|
||||||
|
* @returns The current timer resolution.
|
||||||
|
*/
|
||||||
|
std::chrono::nanoseconds SetCurrentTimerResolution(std::chrono::nanoseconds timer_resolution);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the current timer resolution to the maximum supported timer resolution.
|
||||||
|
*
|
||||||
|
* @returns The current timer resolution.
|
||||||
|
*/
|
||||||
|
std::chrono::nanoseconds SetCurrentTimerResolutionToMaximum();
|
||||||
|
|
||||||
|
/// Sleep for one tick of the current timer resolution.
|
||||||
|
void SleepForOneTick();
|
||||||
|
|
||||||
|
} // namespace Common::Windows
|
|
@ -6,6 +6,7 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "common/atomic_ops.h"
|
#include "common/atomic_ops.h"
|
||||||
|
#include "common/steady_clock.h"
|
||||||
#include "common/uint128.h"
|
#include "common/uint128.h"
|
||||||
#include "common/x64/native_clock.h"
|
#include "common/x64/native_clock.h"
|
||||||
|
|
||||||
|
@ -39,6 +40,12 @@ static u64 FencedRDTSC() {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <u64 Nearest>
|
||||||
|
static u64 RoundToNearest(u64 value) {
|
||||||
|
const auto mod = value % Nearest;
|
||||||
|
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
|
||||||
|
}
|
||||||
|
|
||||||
u64 EstimateRDTSCFrequency() {
|
u64 EstimateRDTSCFrequency() {
|
||||||
// Discard the first result measuring the rdtsc.
|
// Discard the first result measuring the rdtsc.
|
||||||
FencedRDTSC();
|
FencedRDTSC();
|
||||||
|
@ -46,18 +53,18 @@ u64 EstimateRDTSCFrequency() {
|
||||||
FencedRDTSC();
|
FencedRDTSC();
|
||||||
|
|
||||||
// Get the current time.
|
// Get the current time.
|
||||||
const auto start_time = std::chrono::steady_clock::now();
|
const auto start_time = Common::SteadyClock::Now();
|
||||||
const u64 tsc_start = FencedRDTSC();
|
const u64 tsc_start = FencedRDTSC();
|
||||||
// Wait for 200 milliseconds.
|
// Wait for 250 milliseconds.
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds{200});
|
std::this_thread::sleep_for(std::chrono::milliseconds{250});
|
||||||
const auto end_time = std::chrono::steady_clock::now();
|
const auto end_time = Common::SteadyClock::Now();
|
||||||
const u64 tsc_end = FencedRDTSC();
|
const u64 tsc_end = FencedRDTSC();
|
||||||
// Calculate differences.
|
// Calculate differences.
|
||||||
const u64 timer_diff = static_cast<u64>(
|
const u64 timer_diff = static_cast<u64>(
|
||||||
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
||||||
const u64 tsc_diff = tsc_end - tsc_start;
|
const u64 tsc_diff = tsc_end - tsc_start;
|
||||||
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
|
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
|
||||||
return tsc_freq;
|
return RoundToNearest<1000>(tsc_freq);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace X64 {
|
namespace X64 {
|
||||||
|
|
|
@ -6,6 +6,10 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "core/core_timing_util.h"
|
#include "core/core_timing_util.h"
|
||||||
|
@ -38,7 +42,8 @@ struct CoreTiming::Event {
|
||||||
};
|
};
|
||||||
|
|
||||||
CoreTiming::CoreTiming()
|
CoreTiming::CoreTiming()
|
||||||
: clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
: cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
|
||||||
|
event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
||||||
|
|
||||||
CoreTiming::~CoreTiming() {
|
CoreTiming::~CoreTiming() {
|
||||||
Reset();
|
Reset();
|
||||||
|
@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 CoreTiming::GetCPUTicks() const {
|
u64 CoreTiming::GetCPUTicks() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetCPUCycles();
|
return cpu_clock->GetCPUCycles();
|
||||||
}
|
}
|
||||||
return ticks;
|
return ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 CoreTiming::GetClockTicks() const {
|
u64 CoreTiming::GetClockTicks() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetClockCycles();
|
return cpu_clock->GetClockCycles();
|
||||||
}
|
}
|
||||||
return CpuCyclesToClockCycles(ticks);
|
return CpuCyclesToClockCycles(ticks);
|
||||||
}
|
}
|
||||||
|
@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() {
|
||||||
const auto next_time = Advance();
|
const auto next_time = Advance();
|
||||||
if (next_time) {
|
if (next_time) {
|
||||||
// There are more events left in the queue, wait until the next event.
|
// There are more events left in the queue, wait until the next event.
|
||||||
const auto wait_time = *next_time - GetGlobalTimeNs().count();
|
auto wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (wait_time > 0) {
|
if (wait_time > 0) {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// Assume a timer resolution of 1ms.
|
const auto timer_resolution_ns =
|
||||||
static constexpr s64 TimerResolutionNS = 1000000;
|
Common::Windows::GetCurrentTimerResolution().count();
|
||||||
|
|
||||||
// Sleep in discrete intervals of the timer resolution, and spin the rest.
|
while (!paused && !event.IsSet() && wait_time > 0) {
|
||||||
const auto sleep_time = wait_time - (wait_time % TimerResolutionNS);
|
wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (sleep_time > 0) {
|
|
||||||
event.WaitFor(std::chrono::nanoseconds(sleep_time));
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) {
|
if (wait_time >= timer_resolution_ns) {
|
||||||
// Yield to reduce thread starvation.
|
Common::Windows::SleepForOneTick();
|
||||||
std::this_thread::yield();
|
} else {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event.IsSet()) {
|
if (event.IsSet()) {
|
||||||
|
@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
paused_set = true;
|
paused_set = true;
|
||||||
clock->Pause(true);
|
event_clock->Pause(true);
|
||||||
pause_event.Wait();
|
pause_event.Wait();
|
||||||
clock->Pause(false);
|
event_clock->Pause(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,16 +307,23 @@ void CoreTiming::Reset() {
|
||||||
has_started = false;
|
has_started = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
|
||||||
|
if (is_multicore) [[likely]] {
|
||||||
|
return cpu_clock->GetTimeNS();
|
||||||
|
}
|
||||||
|
return CyclesToNs(ticks);
|
||||||
|
}
|
||||||
|
|
||||||
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
|
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetTimeNS();
|
return event_clock->GetTimeNS();
|
||||||
}
|
}
|
||||||
return CyclesToNs(ticks);
|
return CyclesToNs(ticks);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetTimeUS();
|
return event_clock->GetTimeUS();
|
||||||
}
|
}
|
||||||
return CyclesToUs(ticks);
|
return CyclesToUs(ticks);
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,9 @@ public:
|
||||||
/// Returns current time in emulated in Clock cycles
|
/// Returns current time in emulated in Clock cycles
|
||||||
u64 GetClockTicks() const;
|
u64 GetClockTicks() const;
|
||||||
|
|
||||||
|
/// Returns current time in nanoseconds.
|
||||||
|
std::chrono::nanoseconds GetCPUTimeNs() const;
|
||||||
|
|
||||||
/// Returns current time in microseconds.
|
/// Returns current time in microseconds.
|
||||||
std::chrono::microseconds GetGlobalTimeUs() const;
|
std::chrono::microseconds GetGlobalTimeUs() const;
|
||||||
|
|
||||||
|
@ -139,7 +142,8 @@ private:
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
std::unique_ptr<Common::WallClock> clock;
|
std::unique_ptr<Common::WallClock> cpu_clock;
|
||||||
|
std::unique_ptr<Common::WallClock> event_clock;
|
||||||
|
|
||||||
s64 global_timer = 0;
|
s64 global_timer = 0;
|
||||||
|
|
||||||
|
|
|
@ -13,11 +13,9 @@ namespace Core {
|
||||||
|
|
||||||
namespace Hardware {
|
namespace Hardware {
|
||||||
|
|
||||||
// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
|
constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz
|
||||||
// The exact value used is of course unverified.
|
constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
|
||||||
constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch cpu frequency is 1020MHz un/docked
|
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
|
||||||
constexpr u64 CNTFREQ = 19200000; // Switch's hardware clock speed
|
|
||||||
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
|
|
||||||
|
|
||||||
// Virtual to Physical core map.
|
// Virtual to Physical core map.
|
||||||
constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{
|
constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{
|
||||||
|
|
|
@ -197,7 +197,7 @@ struct GPU::Impl {
|
||||||
constexpr u64 gpu_ticks_num = 384;
|
constexpr u64 gpu_ticks_num = 384;
|
||||||
constexpr u64 gpu_ticks_den = 625;
|
constexpr u64 gpu_ticks_den = 625;
|
||||||
|
|
||||||
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
|
u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
|
||||||
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
||||||
nanoseconds /= 256;
|
nanoseconds /= 256;
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,6 +91,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "common/scm_rev.h"
|
#include "common/scm_rev.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
|
#endif
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/x64/cpu_detect.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -377,6 +380,12 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan
|
||||||
LOG_INFO(Frontend, "Host RAM: {:.2f} GiB",
|
LOG_INFO(Frontend, "Host RAM: {:.2f} GiB",
|
||||||
Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB});
|
Common::GetMemInfo().TotalPhysicalMemory / f64{1_GiB});
|
||||||
LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB});
|
LOG_INFO(Frontend, "Host Swap: {:.2f} GiB", Common::GetMemInfo().TotalSwapMemory / f64{1_GiB});
|
||||||
|
#ifdef _WIN32
|
||||||
|
LOG_INFO(Frontend, "Host Timer Resolution: {:.4f} ms",
|
||||||
|
std::chrono::duration_cast<std::chrono::duration<f64, std::milli>>(
|
||||||
|
Common::Windows::SetCurrentTimerResolutionToMaximum())
|
||||||
|
.count());
|
||||||
|
#endif
|
||||||
UpdateWindowTitle();
|
UpdateWindowTitle();
|
||||||
|
|
||||||
show();
|
show();
|
||||||
|
|
|
@ -42,6 +42,8 @@
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
#include <shellapi.h>
|
#include <shellapi.h>
|
||||||
|
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef _UNICODE
|
#undef _UNICODE
|
||||||
|
@ -314,6 +316,8 @@ int main(int argc, char** argv) {
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
LocalFree(argv_w);
|
LocalFree(argv_w);
|
||||||
|
|
||||||
|
Common::Windows::SetCurrentTimerResolutionToMaximum();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
MicroProfileOnThreadCreate("EmuThread");
|
MicroProfileOnThreadCreate("EmuThread");
|
||||||
|
|
Loading…
Add table
Reference in a new issue