vk_scheduler: Delegate commands to a worker thread and state track

Introduce a worker thread approach for delegating Vulkan work derived
from dxvk's approach. https://github.com/doitsujin/dxvk

Now that the scheduler is what handles all Vulkan work related to
command streaming, store state tracking in itself. This way we can know
when to reupload Vulkan dynamic state to the queue (since this one is
invalidated between command buffers unlike NVN). We can also store the
renderpass state and graphics pipeline bound to avoid redundant binds
and renderpass begins/ends.
This commit is contained in:
ReinUsesLisp 2019-12-13 02:24:48 -03:00
parent 8fc49a83b6
commit 2df9a2dcaf
No known key found for this signature in database
GPG key ID: 2DFC508897B39CFE
2 changed files with 315 additions and 41 deletions

View file

@ -3,7 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h"
@ -11,46 +11,172 @@
namespace Vulkan { namespace Vulkan {
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
: device{device}, resource_manager{resource_manager} {
next_fence = &resource_manager.CommitFence(); void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
AllocateNewContext(); const vk::DispatchLoaderDynamic& dld) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf, dld);
command->~Command();
command = next;
}
command_offset = 0;
first = nullptr;
last = nullptr;
} }
VKScheduler::~VKScheduler() = default; VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
: device{device}, resource_manager{resource_manager}, next_fence{
&resource_manager.CommitFence()} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
VKScheduler::~VKScheduler() {
quit = true;
cv.notify_all();
worker_thread.join();
}
void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore); SubmitExecution(semaphore);
if (release_fence) if (release_fence) {
current_fence->Release(); current_fence->Release();
}
AllocateNewContext(); AllocateNewContext();
} }
void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore); SubmitExecution(semaphore);
current_fence->Wait(); current_fence->Wait();
if (release_fence) if (release_fence) {
current_fence->Release(); current_fence->Release();
}
AllocateNewContext(); AllocateNewContext();
} }
void VKScheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
bool finished = false;
do {
cv.notify_all();
std::unique_lock lock{mutex};
finished = chunk_queue.Empty();
} while (!finished);
}
void VKScheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
chunk_queue.Push(std::move(chunk));
cv.notify_all();
AcquireNewChunk();
}
void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
if (state.renderpass && renderpass_bi == *state.renderpass) {
return;
}
const bool end_renderpass = state.renderpass.has_value();
state.renderpass = renderpass_bi;
Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
if (end_renderpass) {
cmdbuf.endRenderPass(dld);
}
cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
});
}
void VKScheduler::RequestOutsideRenderPassOperationContext() {
EndRenderPass();
}
void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
if (state.graphics_pipeline == pipeline) {
return;
}
state.graphics_pipeline = pipeline;
Record([pipeline](auto cmdbuf, auto& dld) {
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
});
}
void VKScheduler::WorkerThread() {
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
if (quit) {
continue;
}
auto extracted_chunk = std::move(chunk_queue.Front());
chunk_queue.Pop();
extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
chunk_reserve.Push(std::move(extracted_chunk));
} while (!quit);
}
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
EndPendingOperations();
InvalidateState();
WaitWorker();
std::unique_lock lock{mutex};
const auto queue = device.GetGraphicsQueue();
const auto& dld = device.GetDispatchLoader(); const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld); current_cmdbuf.end(dld);
const auto queue = device.GetGraphicsQueue(); const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
&semaphore); &semaphore);
queue.submit({submit_info}, *current_fence, dld); queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
} }
void VKScheduler::AllocateNewContext() { void VKScheduler::AllocateNewContext() {
std::unique_lock lock{mutex};
current_fence = next_fence; current_fence = next_fence;
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence(); next_fence = &resource_manager.CommitFence();
const auto& dld = device.GetDispatchLoader(); current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld); current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
device.GetDispatchLoader());
}
void VKScheduler::InvalidateState() {
state.graphics_pipeline = nullptr;
state.viewports = false;
state.scissors = false;
state.depth_bias = false;
state.blend_constants = false;
state.depth_bounds = false;
state.stencil_values = false;
}
void VKScheduler::EndPendingOperations() {
EndRenderPass();
}
void VKScheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
state.renderpass = std::nullopt;
Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
}
void VKScheduler::AcquireNewChunk() {
if (chunk_reserve.Empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.Front());
chunk_reserve.Pop();
} }
} // namespace Vulkan } // namespace Vulkan

View file

@ -4,7 +4,14 @@
#pragma once #pragma once
#include <condition_variable>
#include <memory>
#include <optional>
#include <stack>
#include <thread>
#include <utility>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan { namespace Vulkan {
@ -30,23 +37,6 @@ private:
VKFence* const& fence; VKFence* const& fence;
}; };
class VKCommandBufferView {
public:
VKCommandBufferView() = default;
VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
const vk::CommandBuffer* operator->() const noexcept {
return &cmdbuf;
}
operator vk::CommandBuffer() const noexcept {
return cmdbuf;
}
private:
const vk::CommandBuffer& cmdbuf;
};
/// The scheduler abstracts command buffer and fence management with an interface that's able to do /// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers. /// OpenGL-like operations on Vulkan command buffers.
class VKScheduler { class VKScheduler {
@ -54,32 +44,190 @@ public:
explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
~VKScheduler(); ~VKScheduler();
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
/// Gets a reference to the current command buffer.
VKCommandBufferView GetCommandBuffer() const {
return current_cmdbuf;
}
/// Sends the current execution context to the GPU. /// Sends the current execution context to the GPU.
void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete. /// Sends the current execution context to the GPU and waits for it to complete.
void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
/// Requests the current executino context to be able to execute operations only allowed outside
/// of a renderpass.
void RequestOutsideRenderPassOperationContext();
/// Binds a pipeline to the current execution context.
void BindGraphicsPipeline(vk::Pipeline pipeline);
/// Returns true when viewports have been set in the current command buffer.
bool TouchViewports() {
return std::exchange(state.viewports, true);
}
/// Returns true when scissors have been set in the current command buffer.
bool TouchScissors() {
return std::exchange(state.scissors, true);
}
/// Returns true when depth bias have been set in the current command buffer.
bool TouchDepthBias() {
return std::exchange(state.depth_bias, true);
}
/// Returns true when blend constants have been set in the current command buffer.
bool TouchBlendConstants() {
return std::exchange(state.blend_constants, true);
}
/// Returns true when depth bounds have been set in the current command buffer.
bool TouchDepthBounds() {
return std::exchange(state.depth_bounds, true);
}
/// Returns true when stencil values have been set in the current command buffer.
bool TouchStencilValues() {
return std::exchange(state.stencil_values, true);
}
/// Send work to a separate thread.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
private: private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf,
const vk::DispatchLoaderDynamic& dld) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command) : command{std::move(command)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf,
const vk::DispatchLoaderDynamic& dld) const override {
command(cmdbuf, dld);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
bool Empty() const {
return command_offset == 0;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t command_offset = 0;
std::array<u8, 0x8000> data{};
};
void WorkerThread();
void SubmitExecution(vk::Semaphore semaphore); void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext(); void AllocateNewContext();
void InvalidateState();
void EndPendingOperations();
void EndRenderPass();
void AcquireNewChunk();
const VKDevice& device; const VKDevice& device;
VKResourceManager& resource_manager; VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf; vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr; VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr; VKFence* next_fence = nullptr;
struct State {
std::optional<vk::RenderPassBeginInfo> renderpass;
vk::Pipeline graphics_pipeline;
bool viewports = false;
bool scissors = false;
bool depth_bias = false;
bool blend_constants = false;
bool depth_bounds = false;
bool stencil_values = false;
} state;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
bool quit = false;
}; };
} // namespace Vulkan } // namespace Vulkan