82c2601555
Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.
214 lines
5.8 KiB
C++
214 lines
5.8 KiB
C++
// Copyright 2019 yuzu Emulator Project
|
|
// Licensed under GPLv2 or any later version
|
|
// Refer to the license.txt file included.
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <condition_variable>
|
|
#include <cstddef>
|
|
#include <memory>
|
|
#include <stack>
|
|
#include <thread>
|
|
#include <utility>
|
|
#include "common/alignment.h"
|
|
#include "common/common_types.h"
|
|
#include "common/threadsafe_queue.h"
|
|
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
|
|
|
namespace Vulkan {
|
|
|
|
class CommandPool;
|
|
class Device;
|
|
class Framebuffer;
|
|
class StateTracker;
|
|
class VKQueryCache;
|
|
|
|
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
|
/// OpenGL-like operations on Vulkan command buffers.
|
|
class VKScheduler {
|
|
public:
|
|
explicit VKScheduler(const Device& device, StateTracker& state_tracker);
|
|
~VKScheduler();
|
|
|
|
/// Sends the current execution context to the GPU.
|
|
void Flush(VkSemaphore semaphore = nullptr);
|
|
|
|
/// Sends the current execution context to the GPU and waits for it to complete.
|
|
void Finish(VkSemaphore semaphore = nullptr);
|
|
|
|
/// Waits for the worker thread to finish executing everything. After this function returns it's
|
|
/// safe to touch worker resources.
|
|
void WaitWorker();
|
|
|
|
/// Sends currently recorded work to the worker thread.
|
|
void DispatchWork();
|
|
|
|
/// Requests to begin a renderpass.
|
|
void RequestRenderpass(const Framebuffer* framebuffer);
|
|
|
|
/// Requests the current executino context to be able to execute operations only allowed outside
|
|
/// of a renderpass.
|
|
void RequestOutsideRenderPassOperationContext();
|
|
|
|
/// Binds a pipeline to the current execution context.
|
|
void BindGraphicsPipeline(VkPipeline pipeline);
|
|
|
|
/// Invalidates current command buffer state except for render passes
|
|
void InvalidateState();
|
|
|
|
/// Assigns the query cache.
|
|
void SetQueryCache(VKQueryCache& query_cache_) {
|
|
query_cache = &query_cache_;
|
|
}
|
|
|
|
/// Send work to a separate thread.
|
|
template <typename T>
|
|
void Record(T&& command) {
|
|
if (chunk->Record(command)) {
|
|
return;
|
|
}
|
|
DispatchWork();
|
|
(void)chunk->Record(command);
|
|
}
|
|
|
|
/// Returns the current command buffer tick.
|
|
[[nodiscard]] u64 CurrentTick() const noexcept {
|
|
return master_semaphore->CurrentTick();
|
|
}
|
|
|
|
/// Returns true when a tick has been triggered by the GPU.
|
|
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
|
return master_semaphore->IsFree(tick);
|
|
}
|
|
|
|
/// Waits for the given tick to trigger on the GPU.
|
|
void Wait(u64 tick) {
|
|
master_semaphore->Wait(tick);
|
|
}
|
|
|
|
/// Returns the master timeline semaphore.
|
|
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
|
|
return *master_semaphore;
|
|
}
|
|
|
|
private:
|
|
class Command {
|
|
public:
|
|
virtual ~Command() = default;
|
|
|
|
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
|
|
|
Command* GetNext() const {
|
|
return next;
|
|
}
|
|
|
|
void SetNext(Command* next_) {
|
|
next = next_;
|
|
}
|
|
|
|
private:
|
|
Command* next = nullptr;
|
|
};
|
|
|
|
template <typename T>
|
|
class TypedCommand final : public Command {
|
|
public:
|
|
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
|
|
~TypedCommand() override = default;
|
|
|
|
TypedCommand(TypedCommand&&) = delete;
|
|
TypedCommand& operator=(TypedCommand&&) = delete;
|
|
|
|
void Execute(vk::CommandBuffer cmdbuf) const override {
|
|
command(cmdbuf);
|
|
}
|
|
|
|
private:
|
|
T command;
|
|
};
|
|
|
|
class CommandChunk final {
|
|
public:
|
|
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
|
|
|
template <typename T>
|
|
bool Record(T& command) {
|
|
using FuncType = TypedCommand<T>;
|
|
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
|
|
|
|
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
|
|
if (command_offset > sizeof(data) - sizeof(FuncType)) {
|
|
return false;
|
|
}
|
|
Command* const current_last = last;
|
|
last = new (data.data() + command_offset) FuncType(std::move(command));
|
|
|
|
if (current_last) {
|
|
current_last->SetNext(last);
|
|
} else {
|
|
first = last;
|
|
}
|
|
command_offset += sizeof(FuncType);
|
|
return true;
|
|
}
|
|
|
|
bool Empty() const {
|
|
return command_offset == 0;
|
|
}
|
|
|
|
private:
|
|
Command* first = nullptr;
|
|
Command* last = nullptr;
|
|
|
|
size_t command_offset = 0;
|
|
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
|
|
};
|
|
|
|
struct State {
|
|
VkRenderPass renderpass = nullptr;
|
|
VkFramebuffer framebuffer = nullptr;
|
|
VkExtent2D render_area = {0, 0};
|
|
VkPipeline graphics_pipeline = nullptr;
|
|
};
|
|
|
|
void WorkerThread();
|
|
|
|
void SubmitExecution(VkSemaphore semaphore);
|
|
|
|
void AllocateNewContext();
|
|
|
|
void EndPendingOperations();
|
|
|
|
void EndRenderPass();
|
|
|
|
void AcquireNewChunk();
|
|
|
|
const Device& device;
|
|
StateTracker& state_tracker;
|
|
|
|
std::unique_ptr<MasterSemaphore> master_semaphore;
|
|
std::unique_ptr<CommandPool> command_pool;
|
|
|
|
VKQueryCache* query_cache = nullptr;
|
|
|
|
vk::CommandBuffer current_cmdbuf;
|
|
|
|
std::unique_ptr<CommandChunk> chunk;
|
|
std::thread worker_thread;
|
|
|
|
State state;
|
|
|
|
u32 num_renderpass_images = 0;
|
|
std::array<VkImage, 9> renderpass_images{};
|
|
std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
|
|
|
|
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
|
|
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
|
|
std::mutex mutex;
|
|
std::condition_variable cv;
|
|
bool quit = false;
|
|
};
|
|
|
|
} // namespace Vulkan
|