From e3a8292495cf0fb4297be27c696649dc44e011f0 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 29 Apr 2016 00:07:10 -0700 Subject: [PATCH] Common: Remove section measurement from profiler (#1731) This has been entirely superseded by MicroProfile. The rest of the code can go when a simpler frametime/FPS meter is added to the GUI. --- src/citra_qt/debugger/profiler.cpp | 26 +-- src/common/CMakeLists.txt | 1 - src/common/microprofileui.h | 3 + src/common/profiler.cpp | 82 ---------- src/common/profiler.h | 152 ------------------ src/common/profiler_reporting.h | 27 +--- .../arm/dyncom/arm_dyncom_interpreter.cpp | 7 - src/core/hle/service/gsp_gpu.cpp | 1 - src/core/hle/svc.cpp | 4 - src/video_core/command_processor.cpp | 4 - src/video_core/rasterizer.cpp | 3 - .../renderer_opengl/gl_rasterizer.cpp | 1 - src/video_core/shader/shader.cpp | 3 - 13 files changed, 8 insertions(+), 306 deletions(-) delete mode 100644 src/common/profiler.h diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp index e90973b8e..7bb010f77 100644 --- a/src/citra_qt/debugger/profiler.cpp +++ b/src/citra_qt/debugger/profiler.cpp @@ -9,6 +9,7 @@ #include "citra_qt/debugger/profiler.h" #include "citra_qt/util/util.h" +#include "common/common_types.h" #include "common/microprofile.h" #include "common/profiler_reporting.h" @@ -36,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration) } } -static const TimingCategoryInfo* GetCategoryInfo(int id) -{ - const auto& categories = GetProfilingManager().GetTimingCategoriesInfo(); - if ((size_t)id >= categories.size()) { - return nullptr; - } else { - return &categories[id]; - } -} - ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) { updateProfilingInfo(); - const auto& categories = GetProfilingManager().GetTimingCategoriesInfo(); - results.time_per_category.resize(categories.size()); } QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const @@ -87,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const if (parent.isValid()) { return 0; } else { - return static_cast(results.time_per_category.size() + 2); + return 2; } } @@ -106,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const } else { return GetDataForColumn(index.column(), results.interframe_time); } - } else { - if (index.column() == 0) { - const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2); - return info != nullptr ? QString(info->name) : QVariant(); - } else { - if (index.row() - 2 < (int)results.time_per_category.size()) { - return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]); - } else { - return QVariant(); - } - } } } diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index c839ce173..aa6eee2a3 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -47,7 +47,6 @@ set(HEADERS microprofile.h microprofileui.h platform.h - profiler.h profiler_reporting.h scm_rev.h scope_exit.h diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h index 97c369bd9..41abe6b75 100644 --- a/src/common/microprofileui.h +++ b/src/common/microprofileui.h @@ -13,4 +13,7 @@ #define MICROPROFILE_HELP_ALT "Right-Click" #define MICROPROFILE_HELP_MOD "Ctrl" +// This isn't included by microprofileui.h :( +#include // For std::abs + #include diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp index 7792edd2f..49eb3f40c 100644 --- a/src/common/profiler.cpp +++ b/src/common/profiler.cpp @@ -7,71 +7,16 @@ #include #include "common/assert.h" -#include "common/profiler.h" #include "common/profiler_reporting.h" #include "common/synchronized_wrapper.h" -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013. - #define WIN32_LEAN_AND_MEAN - #include // For QueryPerformanceCounter/Frequency -#endif - namespace Common { namespace Profiling { -#if ENABLE_PROFILING -thread_local Timer* Timer::current_timer = nullptr; -#endif - -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 -QPCClock::time_point QPCClock::now() { - static LARGE_INTEGER freq; - // Use this dummy local static to ensure this gets initialized once. - static BOOL dummy = QueryPerformanceFrequency(&freq); - - LARGE_INTEGER ticks; - QueryPerformanceCounter(&ticks); - - // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The - // correct way to approach this would be to just return ticks as a time_point and then subtract - // and do this conversion when creating a duration from two time_points, however, as far as I - // could tell the C++ requirements for these types are incompatible with this approach. - return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart)); -} -#endif - -TimingCategory::TimingCategory(const char* name, TimingCategory* parent) - : accumulated_duration(0) { - - ProfilingManager& manager = GetProfilingManager(); - category_id = manager.RegisterTimingCategory(this, name); - if (parent != nullptr) - manager.SetTimingCategoryParent(category_id, parent->category_id); -} - ProfilingManager::ProfilingManager() : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { } -unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) { - TimingCategoryInfo info; - info.category = category; - info.name = name; - info.parent = TimingCategoryInfo::NO_PARENT; - - unsigned int id = (unsigned int)timing_categories.size(); - timing_categories.push_back(std::move(info)); - - return id; -} - -void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) { - ASSERT(category < timing_categories.size()); - ASSERT(parent < timing_categories.size()); - - timing_categories[category].parent = parent; -} - void ProfilingManager::BeginFrame() { this_frame_start = Clock::now(); } @@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() { results.interframe_time = now - last_frame_end; results.frame_time = now - this_frame_start; - results.time_per_category.resize(timing_categories.size()); - for (size_t i = 0; i < timing_categories.size(); ++i) { - results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime(); - } - last_frame_end = now; } @@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() { window_size = cursor = 0; } -void TimingResultsAggregator::SetNumberOfCategories(size_t n) { - size_t old_size = times_per_category.size(); - if (n == old_size) - return; - - times_per_category.resize(n); - - for (size_t i = old_size; i < n; ++i) { - times_per_category[i].resize(max_window_size, Duration::zero()); - } -} - void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { - SetNumberOfCategories(frame_result.time_per_category.size()); - interframe_times[cursor] = frame_result.interframe_time; frame_times[cursor] = frame_result.frame_time; - for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) { - times_per_category[i][cursor] = frame_result.time_per_category[i]; - } ++cursor; if (cursor == max_window_size) @@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const { result.fps = 0.0f; } - result.time_per_category.resize(times_per_category.size()); - for (size_t i = 0; i < times_per_category.size(); ++i) { - result.time_per_category[i] = AggregateField(times_per_category[i], window_size); - } - return result; } diff --git a/src/common/profiler.h b/src/common/profiler.h deleted file mode 100644 index 3e967b4bc..000000000 --- a/src/common/profiler.h +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/assert.h" -#include "common/thread.h" - -namespace Common { -namespace Profiling { - -// If this is defined to 0, it turns all Timers into no-ops. -#ifndef ENABLE_PROFILING -#define ENABLE_PROFILING 1 -#endif - -#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 -// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad -// precision. We manually implement a clock based on QPC to get good results. - -struct QPCClock { - using duration = std::chrono::microseconds; - using time_point = std::chrono::time_point; - using rep = duration::rep; - using period = duration::period; - static const bool is_steady = false; - - static time_point now(); -}; - -using Clock = QPCClock; -#else -using Clock = std::chrono::high_resolution_clock; -#endif - -using Duration = Clock::duration; - -/** - * Represents a timing category that measured time can be accounted towards. Should be declared as a - * global variable and passed to Timers. - */ -class TimingCategory final { -public: - TimingCategory(const char* name, TimingCategory* parent = nullptr); - - unsigned int GetCategoryId() const { - return category_id; - } - - /// Adds some time to this category. Can safely be called from multiple threads at the same time. - void AddTime(Duration amount) { - std::atomic_fetch_add_explicit( - &accumulated_duration, amount.count(), - std::memory_order_relaxed); - } - - /** - * Atomically retrieves the accumulated measured time for this category and resets the counter - * to zero. Can be safely called concurrently with AddTime. - */ - Duration GetAccumulatedTime() { - return Duration(std::atomic_exchange_explicit( - &accumulated_duration, (Duration::rep)0, - std::memory_order_relaxed)); - } - -private: - unsigned int category_id; - std::atomic accumulated_duration; -}; - -/** - * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given - * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be - * appropriately paired. - * - * When a Timer is started, it automatically pauses a previously running timer on the same thread, - * which is resumed when it is stopped. As such, no special action needs to be taken to avoid - * double-accounting of time on two categories. - */ -class Timer { -public: - Timer(TimingCategory& category) : category(category) { - } - - void Start() { -#if ENABLE_PROFILING - ASSERT(!running); - previous_timer = current_timer; - current_timer = this; - if (previous_timer != nullptr) - previous_timer->StopTiming(); - - StartTiming(); -#endif - } - - void Stop() { -#if ENABLE_PROFILING - ASSERT(running); - StopTiming(); - - if (previous_timer != nullptr) - previous_timer->StartTiming(); - current_timer = previous_timer; -#endif - } - -private: -#if ENABLE_PROFILING - void StartTiming() { - start = Clock::now(); - running = true; - } - - void StopTiming() { - auto duration = Clock::now() - start; - running = false; - category.AddTime(std::chrono::duration_cast(duration)); - } - - Clock::time_point start; - bool running = false; - - Timer* previous_timer; - static thread_local Timer* current_timer; -#endif - - TimingCategory& category; -}; - -/** - * A Timer that automatically starts timing when created and stops at the end of the scope. Should - * be used in the majority of cases. - */ -class ScopeTimer : public Timer { -public: - ScopeTimer(TimingCategory& category) : Timer(category) { - Start(); - } - - ~ScopeTimer() { - Stop(); - } -}; - -} // namespace Profiling -} // namespace Common diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h index df98e05b7..fa1ac883f 100644 --- a/src/common/profiler_reporting.h +++ b/src/common/profiler_reporting.h @@ -4,22 +4,17 @@ #pragma once +#include #include #include -#include "common/profiler.h" #include "common/synchronized_wrapper.h" namespace Common { namespace Profiling { -struct TimingCategoryInfo { - static const unsigned int NO_PARENT = -1; - - TimingCategory* category; - const char* name; - unsigned int parent; -}; +using Clock = std::chrono::high_resolution_clock; +using Duration = Clock::duration; struct ProfilingFrameResult { /// Time since the last delivered frame @@ -27,22 +22,12 @@ struct ProfilingFrameResult { /// Time spent processing a frame, excluding VSync Duration frame_time; - - /// Total amount of time spent inside each category in this frame. Indexed by the category id - std::vector time_per_category; }; class ProfilingManager final { public: ProfilingManager(); - unsigned int RegisterTimingCategory(TimingCategory* category, const char* name); - void SetTimingCategoryParent(unsigned int category, unsigned int parent); - - const std::vector& GetTimingCategoriesInfo() const { - return timing_categories; - } - /// This should be called after swapping screen buffers. void BeginFrame(); /// This should be called before swapping screen buffers. @@ -54,7 +39,6 @@ public: } private: - std::vector timing_categories; Clock::time_point last_frame_end; Clock::time_point this_frame_start; @@ -73,9 +57,6 @@ struct AggregatedFrameResult { AggregatedDuration frame_time; float fps; - - /// Total amount of time spent inside each category in this frame. Indexed by the category id - std::vector time_per_category; }; class TimingResultsAggregator final { @@ -83,7 +64,6 @@ public: TimingResultsAggregator(size_t window_size); void Clear(); - void SetNumberOfCategories(size_t n); void AddFrame(const ProfilingFrameResult& frame_result); @@ -95,7 +75,6 @@ public: std::vector interframe_times; std::vector frame_times; - std::vector> times_per_category; }; ProfilingManager& GetProfilingManager(); diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 647784208..8d4b26815 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -10,7 +10,6 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "core/memory.h" #include "core/hle/svc.h" @@ -25,9 +24,6 @@ #include "core/gdbstub/gdbstub.h" -Common::Profiling::TimingCategory profile_execute("DynCom::Execute"); -Common::Profiling::TimingCategory profile_decode("DynCom::Decode"); - enum { COND = (1 << 0), NON_BRANCH = (1 << 1), @@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons } static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { - Common::Profiling::ScopeTimer timer_decode(profile_decode); MICROPROFILE_SCOPE(DynCom_Decode); // Decode instruction, get index @@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) } static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { - Common::Profiling::ScopeTimer timer_decode(profile_decode); MICROPROFILE_SCOPE(DynCom_Decode); ARM_INST_PTR inst_base = nullptr; @@ -3565,7 +3559,6 @@ static int clz(unsigned int x) { MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); unsigned InterpreterMainLoop(ARMul_State* cpu) { - Common::Profiling::ScopeTimer timer_execute(profile_execute); MICROPROFILE_SCOPE(DynCom_Execute); GDBStub::BreakpointAddress breakpoint_data; diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 211fcf599..233592d7f 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -4,7 +4,6 @@ #include "common/bit_field.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "core/memory.h" #include "core/hle/kernel/event.h" diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index ae54afb1c..a9a1a3244 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "common/string_util.h" #include "common/symbols.h" @@ -1031,8 +1030,6 @@ static const FunctionDef SVC_Table[] = { {0x7D, HLE::Wrap, "QueryProcessMemory"}, }; -Common::Profiling::TimingCategory profiler_svc("SVC Calls"); - static const FunctionDef* GetSVCInfo(u32 func_num) { if (func_num >= ARRAY_SIZE(SVC_Table)) { LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); @@ -1044,7 +1041,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) { MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); void CallSVC(u32 immediate) { - Common::Profiling::ScopeTimer timer_svc(profiler_svc); MICROPROFILE_SCOPE(Kernel_SVC); const FunctionDef* info = GetSVCInfo(immediate); diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3abe79c09..97ba8214e 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -7,7 +7,6 @@ #include "common/alignment.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "core/settings.h" #include "core/hle/service/gsp_gpu.h" @@ -35,8 +34,6 @@ static int default_attr_counter = 0; static u32 default_attr_write_buffer[3]; -Common::Profiling::TimingCategory category_drawing("Drawing"); - // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF static const u32 expand_bits_to_bytes[] = { 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, @@ -186,7 +183,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw): case PICA_REG_INDEX(trigger_draw_indexed): { - Common::Profiling::ScopeTimer scope_timer(category_drawing); MICROPROFILE_SCOPE(GPU_Drawing); #if PICA_LOG_TEV diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 0434ad05a..9cf77b1f2 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -9,7 +9,6 @@ #include "common/common_types.h" #include "common/math_util.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "core/memory.h" #include "core/hw/gpu.h" @@ -287,7 +286,6 @@ static int SignedArea (const Math::Vec2& vtx1, return Math::Cross(vec1, vec2).z; }; -static Common::Profiling::TimingCategory rasterization_category("Rasterization"); MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** @@ -300,7 +298,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, bool reversed = false) { const auto& regs = g_state.regs; - Common::Profiling::ScopeTimer timer(rasterization_category); MICROPROFILE_SCOPE(GPU_Rasterization); // vertex positions in rasterizer coordinates diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 30187d4cf..a8c775c80 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -11,7 +11,6 @@ #include "common/file_util.h" #include "common/math_util.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "core/memory.h" #include "core/settings.h" diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 75301accd..043e99190 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -9,7 +9,6 @@ #include "common/hash.h" #include "common/microprofile.h" -#include "common/profiler.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" @@ -57,13 +56,11 @@ void Shutdown() { #endif // ARCHITECTURE_x86_64 } -static Common::Profiling::TimingCategory shader_category("Vertex Shader"); MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; - Common::Profiling::ScopeTimer timer(shader_category); MICROPROFILE_SCOPE(GPU_VertexShader); state.program_counter = config.main_offset;