From 1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Feb 2020 11:55:32 -0400 Subject: [PATCH] GPU: Implement Flush Requests for Async mode. --- src/video_core/gpu.cpp | 22 +++++++++++++++++++ src/video_core/gpu.h | 21 ++++++++++++++++++ src/video_core/gpu_thread.cpp | 17 +++++++++----- src/video_core/gpu_thread.h | 8 +++++-- .../renderer_opengl/gl_rasterizer.cpp | 6 +++++ .../renderer_vulkan/vk_rasterizer.cpp | 4 ++++ 6 files changed, 70 insertions(+), 8 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 19d3bd3052..85a6c7bb5a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) { + std::unique_lock lck{flush_request_mutex}; + const u64 fence = ++last_flush_fence; + flush_requests.emplace_back(fence, addr, size); + return fence; +} + +void GPU::TickWork() { + std::unique_lock lck{flush_request_mutex}; + while (!flush_requests.empty()) { + auto& request = flush_requests.front(); + const u64 fence = request.fence; + const CacheAddr addr = request.addr; + const std::size_t size = request.size; + flush_requests.pop_front(); + flush_request_mutex.unlock(); + renderer->Rasterizer().FlushRegion(addr, size); + current_flush_fence.store(fence); + flush_request_mutex.lock(); + } +} + u64 GPU::GetTicks() const { // This values were reversed engineered by fincs from NVN // The gpu clock is reported in units of 385/625 nanoseconds diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fa9991c871..943a5b1100 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -159,6 +159,14 @@ public: void SyncGuestHost(); virtual void OnCommandListEnd(); + u64 RequestFlush(CacheAddr addr, std::size_t size); + + u64 CurrentFlushRequestFence() const { + return current_flush_fence.load(std::memory_order_relaxed); + } + + void TickWork(); + /// Returns a reference to the Maxwell3D GPU engine. Engines::Maxwell3D& Maxwell3D(); @@ -327,6 +335,19 @@ private: std::condition_variable sync_cv; + struct FlushRequest { + FlushRequest(u64 fence, CacheAddr addr, std::size_t size) + : fence{fence}, addr{addr}, size{size} {} + u64 fence; + CacheAddr addr; + std::size_t size; + }; + + std::list flush_requests; + std::atomic current_flush_fence{}; + u64 last_flush_fence{}; + std::mutex flush_request_mutex; + const bool is_async; }; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3e2be00e9d..9460364a3e 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -15,8 +15,9 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread -static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher, SynchState& state) { +static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, + Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, + SynchState& state) { MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context @@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (const auto data = std::get_if(&next.data)) { renderer.Rasterizer().ReleaseFences(); + } else if (const auto data = std::get_if(&next.data)) { + system.GPU().TickWork(); } else if (const auto data = std::get_if(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if(&next.data)) { @@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), - std::ref(state)}; + thread = std::thread{RunThread, std::ref(system), std::ref(renderer), + std::ref(context), std::ref(dma_pusher), std::ref(state)}; } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { @@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { return; } if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { - u64 fence = PushCommand(FlushRegionCommand(addr, size)); - while (fence > state.signaled_fence.load(std::memory_order_relaxed)) { + auto& gpu = system.GPU(); + u64 fence = gpu.RequestFlush(addr, size); + PushCommand(GPUTickCommand()); + while (fence > gpu.CurrentFlushRequestFence()) { } } } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 9d08779216..5a28335d61 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final { u64 size; }; -/// Command to signal to the GPU thread that processing has ended +/// Command called within the gpu, to schedule actions after a command list end struct OnCommandListEndCommand final {}; +/// Command to make the gpu look into pending requests +struct GPUTickCommand final {}; + using CommandData = std::variant; + InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, + GPUTickCommand>; struct CommandDataContainer { CommandDataContainer() = default; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e52e5961f1..fbd81b895f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + + system.GPU().TickWork(); } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; + system.GPU().TickWork(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { } bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { + if (!Settings::IsGPULevelExtreme()) { + return buffer_cache.MustFlushRegion(addr, size); + } return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 507262c8fe..926ecf38ea 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); + + system.GPU().TickWork(); } void RasterizerVulkan::Clear() { @@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { descriptor_set, {}); cmdbuf.Dispatch(grid_x, grid_y, grid_z); }); + + system.GPU().TickWork(); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {