GPU_Async: Correct fences, display events and more.

This commit uses guest fences on vSync event instead of an articial fake 
fence we had.
It also corrects to keep signaling display events while loading the game 
as the OS is suppose to send buffers to vSync during that time.
This commit is contained in:
Fernando Sahmkow 2019-09-25 19:43:23 -04:00 committed by FernandoS27
parent 4e9f975935
commit 5b5e60ffec
6 changed files with 38 additions and 21 deletions

View file

@ -36,6 +36,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
displays.emplace_back(3, "Internal", system); displays.emplace_back(3, "Internal", system);
displays.emplace_back(4, "Null", system); displays.emplace_back(4, "Null", system);
for (auto& display : displays) {
display.SignalVSyncEvent();
}
// Schedule the screen composition events // Schedule the screen composition events
composition_event = system.CoreTiming().RegisterEvent( composition_event = system.CoreTiming().RegisterEvent(
"ScreenComposition", [this](u64 userdata, s64 cycles_late) { "ScreenComposition", [this](u64 userdata, s64 cycles_late) {
@ -173,7 +177,13 @@ void NVFlinger::Compose() {
bool trigger_event = false; bool trigger_event = false;
// Trigger vsync for this display at the end of drawing // Trigger vsync for this display at the end of drawing
SCOPE_EXIT({ SCOPE_EXIT({
if (trigger_event) { // TODO(Blinkhawk): Correctly send buffers through nvflinger while
// loading the game thorugh the OS.
// During loading, the OS takes care of sending buffers to vsync,
// thus it triggers, since this is not properly emulated due to
// HLE complications, we allow it to signal until the game enqueues
// it's first buffer.
if (trigger_event || !first_buffer_enqueued) {
display.SignalVSyncEvent(); display.SignalVSyncEvent();
} }
}); });
@ -193,13 +203,20 @@ void NVFlinger::Compose() {
if (!buffer) { if (!buffer) {
// There was no queued buffer to draw, render previous frame // There was no queued buffer to draw, render previous frame
system.GetPerfStats().EndGameFrame();
system.GPU().SwapBuffers({}); system.GPU().SwapBuffers({});
continue; continue;
} }
const auto& igbp_buffer = buffer->get().igbp_buffer; const auto& igbp_buffer = buffer->get().igbp_buffer;
trigger_event = true; trigger_event = true;
first_buffer_enqueued = true;
const auto& gpu = system.GPU();
const auto& multi_fence = buffer->get().multi_fence;
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];
gpu.WaitFence(fence.id, fence.value);
}
// Now send the buffer to the GPU for drawing. // Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based // TODO(Subv): Support more than just disp0. The display device selection is probably based

View file

@ -102,6 +102,8 @@ private:
u32 swap_interval = 1; u32 swap_interval = 1;
bool first_buffer_enqueued{};
/// Event that handles screen composition. /// Event that handles screen composition.
Core::Timing::EventType* composition_event; Core::Timing::EventType* composition_event;

View file

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/assert.h" #include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h" #include "core/core.h"
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/memory.h" #include "core/memory.h"
@ -17,6 +18,8 @@
namespace Tegra { namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} { : system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()}; auto& rasterizer{renderer.Rasterizer()};
@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher; return *dma_pusher;
} }
void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
// Synced GPU, is always in sync
if (!is_async) {
return;
}
MICROPROFILE_SCOPE(GPU_wait);
while (syncpoints[syncpoint_id].load() < value) {
}
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) { void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++; syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex}; std::lock_guard lock{sync_mutex};

View file

@ -177,6 +177,9 @@ public:
/// Returns a reference to the GPU DMA pusher. /// Returns a reference to the GPU DMA pusher.
Tegra::DmaPusher& DmaPusher(); Tegra::DmaPusher& DmaPusher();
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value) const;
void IncrementSyncPoint(u32 syncpoint_id); void IncrementSyncPoint(u32 syncpoint_id);
u32 GetSyncpointValue(u32 syncpoint_id) const; u32 GetSyncpointValue(u32 syncpoint_id) const;

View file

@ -5,8 +5,6 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "core/core.h" #include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h" #include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
synchronization_event = system.CoreTiming().RegisterEvent(
"GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
} }
void ThreadManager::SubmitList(Tegra::CommandList&& entries) { void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; PushCommand(SubmitListCommand(std::move(entries)));
const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
} }
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) {
return fence; return fence;
} }
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
void SynchState::WaitForSynchronization(u64 fence) {
while (signaled_fence.load() < fence)
;
}
} // namespace VideoCommon::GPUThread } // namespace VideoCommon::GPUThread

View file

@ -21,9 +21,6 @@ class DmaPusher;
namespace Core { namespace Core {
class System; class System;
namespace Timing {
struct EventType;
} // namespace Timing
} // namespace Core } // namespace Core
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {
@ -89,8 +86,6 @@ struct CommandDataContainer {
struct SynchState final { struct SynchState final {
std::atomic_bool is_running{true}; std::atomic_bool is_running{true};
void WaitForSynchronization(u64 fence);
using CommandQueue = Common::SPSCQueue<CommandDataContainer>; using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue; CommandQueue queue;
u64 last_fence{}; u64 last_fence{};
@ -128,7 +123,6 @@ private:
private: private:
SynchState state; SynchState state;
Core::System& system; Core::System& system;
Core::Timing::EventType* synchronization_event{};
std::thread thread; std::thread thread;
std::thread::id thread_id; std::thread::id thread_id;
}; };