citra/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
GPUCode dfa2fd0e0d
Add vulkan backend (#6512)
* code: Prepare frontend for vulkan support

* citra_qt: Add vulkan options to the GUI

* vk_instance: Collect tooling info

* renderer_vulkan: Add vulkan backend

* qt: Fix fullscreen and resize issues on macOS. (#47)

* qt: Fix bugged macOS full screen transition.

* renderer/vulkan: Fix swapchain recreation destroying in-use semaphore.

* renderer/vulkan: Make gl_Position invariant. (#48)

This fixes an issue with black artifacts in Pokemon games on Apple GPUs.
If the vertex calculations differ slightly between render passes, it can
cause parts of model faces to fail depth test.

* vk_renderpass_cache: Bump pixel format count

* android: Custom driver code

* vk_instance: Set moltenvk configuration

* rasterizer_cache: Proper surface unregister

* citra_qt: Fix invalid characters

* vk_rasterizer: Correct special unbind

* android: Allow async presentation toggle

* vk_graphics_pipeline: Fix async shader compilation

* We were actually waiting for the pipelines regardless of the setting, oops

* vk_rasterizer: More robust attribute loading

* android: Move PollEvents to OpenGL window

* Vulkan does not need this and it causes problems

* vk_instance: Enable robust buffer access

* Improves stability on mali devices

* vk_renderpass_cache: Bring back renderpass flushing

* externals: Update vulkan-headers

* gl_rasterizer: Separable shaders for everyone

* vk_blit_helper: Corect depth to color convertion

* renderer_vulkan: Implement reinterpretation with copy

* Allows reinterpreteration with simply copy on AMD

* vk_graphics_pipeline: Only fast compile if no shaders are pending

* With this shaders weren't being compiled in parallel

* vk_swapchain: Ensure vsync doesn't lock framerate

* vk_present_window: Match guest swapchain size to vulkan image count

* Less latency and fixes crashes that were caused by images being deleted before free

* vk_instance: Blacklist VK_EXT_pipeline_creation_cache_control with nvidia gpus

* Resolves crashes when async shader compilation is enabled

* vk_rasterizer: Bump async threshold to 6

* Many games have fullscreen quads with 6 vertices. Fixes pokemon textures missing with async shaders

* android: More robust surface recreation

* renderer_vulkan: Fix dynamic state being lost

* vk_pipeline_cache: Skip cache save when no pipeline cache exists

* This is the cache when loading a save state

* sdl: Fix surface initialization on macOS. (#49)

* sdl: Fix surface initialization on macOS.

* sdl: Fix render window events not being handled under Vulkan.

* renderer/vulkan: Fix binding/unbinding of shadow rendering buffer.

* vk_stream_buffer: Respect non coherent access alignment

* Required by nvidia GPUs on MacOS

* renderer/vulkan: Support VK_EXT_fragment_shader_interlock for shadow rendering. (#51)

* renderer_vulkan: Port some recent shader fixes

* vk_pipeline_cache: Improve shadow detection

* vk_swapchain: Add missing check

* renderer_vulkan: Fix hybrid screen

* Revert "gl_rasterizer: Separable shaders for everyone"

Causes crashes on mali GPUs, will need separate PR

This reverts commit d22d556d30.

* renderer_vulkan: Fix flipped screenshot

---------

Co-authored-by: Steveice10 <1269164+Steveice10@users.noreply.github.com>
2023-09-13 01:28:50 +03:00

208 lines
6.7 KiB
C++

// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <limits>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
constexpr u64 WAIT_TIMEOUT = std::numeric_limits<u64>::max();
MasterSemaphoreTimeline::MasterSemaphoreTimeline(const Instance& instance_) : instance{instance_} {
const vk::StructureChain semaphore_chain = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfoKHR{
.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0,
},
};
semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get());
}
MasterSemaphoreTimeline::~MasterSemaphoreTimeline() = default;
void MasterSemaphoreTimeline::Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = instance.GetDevice().getSemaphoreCounterValueKHR(*semaphore);
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
void MasterSemaphoreTimeline::Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
const vk::SemaphoreWaitInfoKHR wait_info = {
.semaphoreCount = 1,
.pSemaphores = &semaphore.get(),
.pValues = &tick,
};
while (instance.GetDevice().waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) !=
vk::Result::eSuccess) {
}
Refresh();
}
void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
vk::Semaphore signal, u64 signal_value) {
cmdbuf.end();
const u32 num_signal_semaphores = signal ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{Handle(), signal};
const u32 num_wait_semaphores = wait ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{Handle(), wait};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
try {
instance.GetGraphicsQueue().submit(submit_info);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
}
constexpr u64 FENCE_RESERVE = 8;
MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} {
const vk::Device device{instance.GetDevice()};
for (u64 i = 0; i < FENCE_RESERVE; i++) {
free_queue.push(device.createFenceUnique({}));
}
wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
}
MasterSemaphoreFence::~MasterSemaphoreFence() = default;
void MasterSemaphoreFence::Refresh() {}
void MasterSemaphoreFence::Wait(u64 tick) {
while (true) {
u64 current_value = gpu_tick.load(std::memory_order_relaxed);
if (current_value >= tick) {
return;
}
gpu_tick.wait(current_value);
}
}
void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
vk::Semaphore signal, u64 signal_value) {
cmdbuf.end();
const u32 num_signal_semaphores = signal ? 1U : 0U;
const u32 num_wait_semaphores = wait ? 1U : 0U;
static constexpr std::array<vk::PipelineStageFlags, 1> wait_stage_masks = {
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal,
};
vk::UniqueFence fence{GetFreeFence()};
try {
instance.GetGraphicsQueue().submit(submit_info, *fence);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
std::scoped_lock lock{wait_mutex};
wait_queue.push({
.handle = std::move(fence),
.signal_value = signal_value,
});
wait_cv.notify_one();
}
void MasterSemaphoreFence::WaitThread(std::stop_token token) {
const vk::Device device{instance.GetDevice()};
while (!token.stop_requested()) {
Fence fence;
{
std::unique_lock lock{wait_mutex};
Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
if (token.stop_requested()) {
return;
}
fence = std::move(wait_queue.front());
wait_queue.pop();
}
const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT);
if (result != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result));
UNREACHABLE();
}
device.resetFences(*fence.handle);
gpu_tick.store(fence.signal_value);
gpu_tick.notify_all();
std::scoped_lock lock{free_mutex};
free_queue.push(std::move(fence.handle));
}
}
vk::UniqueFence MasterSemaphoreFence::GetFreeFence() {
std::scoped_lock lock{free_mutex};
if (free_queue.empty()) {
return instance.GetDevice().createFenceUnique({});
}
vk::UniqueFence fence{std::move(free_queue.front())};
free_queue.pop();
return fence;
}
} // namespace Vulkan