renderer_vulkan: Add vulkan backend
This commit is contained in:
parent
a8d590ae80
commit
59549a2eb6
45 changed files with 8914 additions and 21 deletions
|
@ -242,6 +242,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_SurfaceChanged(JNIEnv* env,
|
|||
if (window) {
|
||||
window->OnSurfaceChanged(s_surf);
|
||||
}
|
||||
if (VideoCore::g_renderer) {
|
||||
VideoCore::g_renderer->NotifySurfaceChanged();
|
||||
}
|
||||
|
||||
LOG_INFO(Frontend, "Surface changed");
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "citra_qt/configuration/configure_graphics.h"
|
||||
#include "common/settings.h"
|
||||
#include "ui_configure_graphics.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
ConfigureGraphics::ConfigureGraphics(std::span<const QString> physical_devices, bool is_powered_on,
|
||||
QWidget* parent)
|
||||
|
|
|
@ -31,6 +31,8 @@ std::string_view GetGraphicsAPIName(GraphicsAPI api) {
|
|||
return "Software";
|
||||
case GraphicsAPI::OpenGL:
|
||||
return "OpenGL";
|
||||
case GraphicsAPI::Vulkan:
|
||||
return "Vulkan";
|
||||
default:
|
||||
return "Invalid";
|
||||
}
|
||||
|
|
|
@ -101,18 +101,47 @@ add_library(video_core STATIC
|
|||
renderer_software/sw_texturing.cpp
|
||||
renderer_software/sw_texturing.h
|
||||
renderer_vulkan/pica_to_vk.h
|
||||
renderer_vulkan/renderer_vulkan.cpp
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_blit_helper.cpp
|
||||
renderer_vulkan/vk_blit_helper.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_descriptor_pool.cpp
|
||||
renderer_vulkan/vk_descriptor_pool.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_rasterizer_cache.cpp
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_resource_pool.cpp
|
||||
renderer_vulkan/vk_resource_pool.h
|
||||
renderer_vulkan/vk_instance.cpp
|
||||
renderer_vulkan/vk_instance.h
|
||||
renderer_vulkan/vk_pipeline_cache.cpp
|
||||
renderer_vulkan/vk_pipeline_cache.h
|
||||
renderer_vulkan/vk_platform.cpp
|
||||
renderer_vulkan/vk_platform.h
|
||||
renderer_vulkan/vk_present_window.cpp
|
||||
renderer_vulkan/vk_present_window.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_shader_gen.cpp
|
||||
renderer_vulkan/vk_shader_gen.h
|
||||
renderer_vulkan/vk_shader_gen_spv.cpp
|
||||
renderer_vulkan/vk_shader_gen_spv.h
|
||||
renderer_vulkan/vk_shader_util.cpp
|
||||
renderer_vulkan/vk_shader_util.h
|
||||
renderer_vulkan/vk_stream_buffer.cpp
|
||||
renderer_vulkan/vk_stream_buffer.h
|
||||
renderer_vulkan/vk_swapchain.cpp
|
||||
renderer_vulkan/vk_swapchain.h
|
||||
renderer_vulkan/vk_texture_runtime.cpp
|
||||
renderer_vulkan/vk_texture_runtime.h
|
||||
shader/debug_data.h
|
||||
shader/shader.cpp
|
||||
shader/shader.h
|
||||
|
|
|
@ -708,8 +708,8 @@ FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color
|
|||
fb_rect = depth_rect;
|
||||
}
|
||||
|
||||
const Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr;
|
||||
const Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr;
|
||||
Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr;
|
||||
Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr;
|
||||
|
||||
if (color_id) {
|
||||
color_level = color_surface->LevelOf(color_params.addr);
|
||||
|
@ -722,7 +722,7 @@ FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color
|
|||
boost::icl::length(depth_vp_interval));
|
||||
}
|
||||
|
||||
fb_params = FramebufferParams{
|
||||
const FramebufferParams fb_params = {
|
||||
.color_id = color_id,
|
||||
.depth_id = depth_id,
|
||||
.color_level = color_level,
|
||||
|
|
|
@ -26,7 +26,7 @@ using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size
|
|||
|
||||
class RasterizerInterface {
|
||||
public:
|
||||
virtual ~RasterizerInterface() {}
|
||||
virtual ~RasterizerInterface() = default;
|
||||
|
||||
/// Queues the primitive formed by the given vertices for rendering
|
||||
virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
|
|
|
@ -159,6 +159,7 @@ struct FramebufferRegs {
|
|||
} stencil_test;
|
||||
|
||||
union {
|
||||
u32 depth_color_mask;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<4, 3, CompareFunc> depth_test_func;
|
||||
BitField<8, 1, u32> red_enable;
|
||||
|
|
|
@ -63,6 +63,9 @@ public:
|
|||
/// Synchronizes fixed function renderer state
|
||||
virtual void Sync() {}
|
||||
|
||||
/// This is called to notify the rendering backend of a surface change
|
||||
virtual void NotifySurfaceChanged() {}
|
||||
|
||||
/// Returns the resolution scale factor relative to the native 3DS screen resolution
|
||||
u32 GetResolutionScaleFactor();
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_mailbox.h"
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "common/logging/log.h"
|
||||
#include "core/core.h"
|
||||
#include "core/telemetry_session.h"
|
||||
#include "video_core/regs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
|
@ -172,7 +173,10 @@ inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopol
|
|||
return vk::PrimitiveTopology::eTriangleList;
|
||||
case Pica::PipelineRegs::TriangleTopology::Strip:
|
||||
return vk::PrimitiveTopology::eTriangleStrip;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown triangle topology {}", topology);
|
||||
}
|
||||
return vk::PrimitiveTopology::eTriangleList;
|
||||
}
|
||||
|
||||
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
|
||||
|
@ -182,7 +186,10 @@ inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
|
|||
case Pica::RasterizerRegs::CullMode::KeepClockWise:
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::CullModeFlagBits::eBack;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown cull mode {}", mode);
|
||||
}
|
||||
return vk::CullModeFlagBits::eNone;
|
||||
}
|
||||
|
||||
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
|
||||
|
@ -192,7 +199,10 @@ inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
|
|||
return vk::FrontFace::eCounterClockwise;
|
||||
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
|
||||
return vk::FrontFace::eClockwise;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown cull mode {}", mode);
|
||||
}
|
||||
return vk::FrontFace::eClockwise;
|
||||
}
|
||||
|
||||
inline Common::Vec4f ColorRGBA8(const u32 color) {
|
||||
|
|
1098
src/video_core/renderer_vulkan/renderer_vulkan.cpp
Normal file
1098
src/video_core/renderer_vulkan/renderer_vulkan.cpp
Normal file
File diff suppressed because it is too large
Load diff
139
src/video_core/renderer_vulkan/renderer_vulkan.h
Normal file
139
src/video_core/renderer_vulkan/renderer_vulkan.h
Normal file
|
@ -0,0 +1,139 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/renderer_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_present_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
class TelemetrySession;
|
||||
} // namespace Core
|
||||
|
||||
namespace Memory {
|
||||
class MemorySystem;
|
||||
}
|
||||
|
||||
namespace Layout {
|
||||
struct FramebufferLayout;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct TextureInfo {
|
||||
u32 width;
|
||||
u32 height;
|
||||
GPU::Regs::PixelFormat format;
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
VmaAllocation allocation;
|
||||
};
|
||||
|
||||
struct ScreenInfo {
|
||||
TextureInfo texture;
|
||||
Common::Rectangle<f32> texcoords;
|
||||
vk::ImageView image_view;
|
||||
};
|
||||
|
||||
struct PresentUniformData {
|
||||
std::array<f32, 4 * 4> modelview;
|
||||
Common::Vec4f i_resolution;
|
||||
Common::Vec4f o_resolution;
|
||||
int screen_id_l = 0;
|
||||
int screen_id_r = 0;
|
||||
int layer = 0;
|
||||
int reverse_interlaced = 0;
|
||||
};
|
||||
static_assert(sizeof(PresentUniformData) == 112,
|
||||
"PresentUniformData does not structure in shader!");
|
||||
|
||||
class RendererVulkan : public VideoCore::RendererBase {
|
||||
static constexpr std::size_t PRESENT_PIPELINES = 3;
|
||||
|
||||
public:
|
||||
explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window,
|
||||
Frontend::EmuWindow* secondary_window);
|
||||
~RendererVulkan() override;
|
||||
|
||||
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override {
|
||||
return &rasterizer;
|
||||
}
|
||||
|
||||
void NotifySurfaceChanged() override {
|
||||
main_window.NotifySurfaceChanged();
|
||||
}
|
||||
|
||||
void SwapBuffers() override;
|
||||
void TryPresent(int timeout_ms, bool is_secondary) override {}
|
||||
void Sync() override;
|
||||
|
||||
private:
|
||||
void ReportDriver() const;
|
||||
void ReloadPipeline();
|
||||
void CompileShaders();
|
||||
void BuildLayouts();
|
||||
void BuildPipelines();
|
||||
void ConfigureFramebufferTexture(TextureInfo& texture,
|
||||
const GPU::Regs::FramebufferConfig& framebuffer);
|
||||
void ConfigureRenderPipeline();
|
||||
void PrepareRendertarget();
|
||||
void RenderScreenshot();
|
||||
void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout);
|
||||
void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout,
|
||||
bool flipped);
|
||||
|
||||
void DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, bool flipped);
|
||||
void DrawBottomScreen(const Layout::FramebufferLayout& layout,
|
||||
const Common::Rectangle<u32>& bottom_screen);
|
||||
void DrawTopScreen(const Layout::FramebufferLayout& layout,
|
||||
const Common::Rectangle<u32>& top_screen);
|
||||
void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h,
|
||||
Layout::DisplayOrientation orientation);
|
||||
void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w,
|
||||
float h, Layout::DisplayOrientation orientation);
|
||||
void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
|
||||
ScreenInfo& screen_info, bool right_eye);
|
||||
void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
|
||||
|
||||
private:
|
||||
Memory::MemorySystem& memory;
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
|
||||
Instance instance;
|
||||
Scheduler scheduler;
|
||||
RenderpassCache renderpass_cache;
|
||||
DescriptorPool pool;
|
||||
PresentWindow main_window;
|
||||
StreamBuffer vertex_buffer;
|
||||
RasterizerVulkan rasterizer;
|
||||
std::unique_ptr<PresentWindow> second_window;
|
||||
|
||||
vk::UniquePipelineLayout present_pipeline_layout;
|
||||
DescriptorSetProvider present_set_provider;
|
||||
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
|
||||
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
|
||||
std::array<vk::Sampler, 2> present_samplers;
|
||||
vk::ShaderModule present_vertex_shader;
|
||||
u32 current_pipeline = 0;
|
||||
|
||||
std::array<ScreenInfo, 3> screen_infos{};
|
||||
std::array<DescriptorData, 3> present_textures{};
|
||||
PresentUniformData draw_info{};
|
||||
vk::ClearColorValue clear_color{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
548
src/video_core/renderer_vulkan/vk_blit_helper.cpp
Normal file
548
src/video_core/renderer_vulkan/vk_blit_helper.cpp
Normal file
|
@ -0,0 +1,548 @@
|
|||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_helper.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
#include "video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8_comp_spv.h"
|
||||
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_depth_to_buffer_comp_spv.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCore::PixelFormat;
|
||||
|
||||
namespace {
|
||||
struct PushConstants {
|
||||
std::array<float, 2> tex_scale;
|
||||
std::array<float, 2> tex_offset;
|
||||
};
|
||||
|
||||
struct ComputeInfo {
|
||||
Common::Vec2i src_offset;
|
||||
Common::Vec2i src_extent;
|
||||
};
|
||||
|
||||
inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
.size = 2 * sizeof(Common::Vec2i),
|
||||
};
|
||||
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 3> COMPUTE_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
{1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
{2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
}};
|
||||
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 3> COMPUTE_BUFFER_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
{2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
}};
|
||||
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 2> TWO_TEXTURES_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
}};
|
||||
|
||||
inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eVertex,
|
||||
.offset = 0,
|
||||
.size = sizeof(PushConstants),
|
||||
};
|
||||
constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
|
||||
.vertexBindingDescriptionCount = 0,
|
||||
.pVertexBindingDescriptions = nullptr,
|
||||
.vertexAttributeDescriptionCount = 0,
|
||||
.pVertexAttributeDescriptions = nullptr,
|
||||
};
|
||||
constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
|
||||
.topology = vk::PrimitiveTopology::eTriangleList,
|
||||
.primitiveRestartEnable = VK_FALSE,
|
||||
};
|
||||
constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
|
||||
.viewportCount = 1,
|
||||
.pViewports = nullptr,
|
||||
.scissorCount = 1,
|
||||
.pScissors = nullptr,
|
||||
};
|
||||
constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
|
||||
.depthClampEnable = VK_FALSE,
|
||||
.rasterizerDiscardEnable = VK_FALSE,
|
||||
.polygonMode = vk::PolygonMode::eFill,
|
||||
.cullMode = vk::CullModeFlagBits::eBack,
|
||||
.frontFace = vk::FrontFace::eClockwise,
|
||||
.depthBiasEnable = VK_FALSE,
|
||||
.depthBiasConstantFactor = 0.0f,
|
||||
.depthBiasClamp = 0.0f,
|
||||
.depthBiasSlopeFactor = 0.0f,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.sampleShadingEnable = VK_FALSE,
|
||||
.minSampleShading = 0.0f,
|
||||
.pSampleMask = nullptr,
|
||||
.alphaToCoverageEnable = VK_FALSE,
|
||||
.alphaToOneEnable = VK_FALSE,
|
||||
};
|
||||
constexpr std::array DYNAMIC_STATES{
|
||||
vk::DynamicState::eViewport,
|
||||
vk::DynamicState::eScissor,
|
||||
};
|
||||
constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
|
||||
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
|
||||
.pDynamicStates = DYNAMIC_STATES.data(),
|
||||
};
|
||||
constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = vk::LogicOp::eClear,
|
||||
.attachmentCount = 0,
|
||||
.pAttachments = nullptr,
|
||||
.blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f},
|
||||
};
|
||||
constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
|
||||
.depthTestEnable = VK_TRUE,
|
||||
.depthWriteEnable = VK_TRUE,
|
||||
.depthCompareOp = vk::CompareOp::eAlways,
|
||||
.depthBoundsTestEnable = VK_FALSE,
|
||||
.stencilTestEnable = VK_FALSE,
|
||||
.front = vk::StencilOpState{},
|
||||
.back = vk::StencilOpState{},
|
||||
.minDepthBounds = 0.0f,
|
||||
.maxDepthBounds = 0.0f,
|
||||
};
|
||||
|
||||
template <vk::Filter filter>
|
||||
inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{
|
||||
.magFilter = filter,
|
||||
.minFilter = filter,
|
||||
.mipmapMode = vk::SamplerMipmapMode::eNearest,
|
||||
.addressModeU = vk::SamplerAddressMode::eClampToBorder,
|
||||
.addressModeV = vk::SamplerAddressMode::eClampToBorder,
|
||||
.addressModeW = vk::SamplerAddressMode::eClampToBorder,
|
||||
.mipLodBias = 0.0f,
|
||||
.anisotropyEnable = VK_FALSE,
|
||||
.maxAnisotropy = 0.0f,
|
||||
.compareEnable = VK_FALSE,
|
||||
.compareOp = vk::CompareOp::eNever,
|
||||
.minLod = 0.0f,
|
||||
.maxLod = 0.0f,
|
||||
.borderColor = vk::BorderColor::eFloatOpaqueWhite,
|
||||
.unnormalizedCoordinates = VK_FALSE,
|
||||
};
|
||||
|
||||
constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo(
|
||||
const vk::DescriptorSetLayout* set_layout, bool compute = false) {
|
||||
return vk::PipelineLayoutCreateInfo{
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE),
|
||||
};
|
||||
}
|
||||
|
||||
constexpr std::array<vk::PipelineShaderStageCreateInfo, 2> MakeStages(
|
||||
vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) {
|
||||
return std::array{
|
||||
vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
.module = vertex_shader,
|
||||
.pName = "main",
|
||||
},
|
||||
vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
.module = fragment_shader,
|
||||
.pName = "main",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) {
|
||||
return vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = compute_shader,
|
||||
.pName = "main",
|
||||
};
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool,
|
||||
RenderpassCache& renderpass_cache_)
|
||||
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_},
|
||||
device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS},
|
||||
compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS},
|
||||
two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS},
|
||||
compute_pipeline_layout{
|
||||
device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))},
|
||||
compute_buffer_pipeline_layout{device.createPipelineLayout(
|
||||
PipelineLayoutCreateInfo(&compute_buffer_provider.Layout(), true))},
|
||||
two_textures_pipeline_layout{
|
||||
device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_provider.Layout()))},
|
||||
full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)},
|
||||
d24s8_to_rgba8_comp{CompileSPV(VULKAN_D24S8_TO_RGBA8_COMP_SPV, device)},
|
||||
depth_to_buffer_comp{CompileSPV(VULKAN_DEPTH_TO_BUFFER_COMP_SPV, device)},
|
||||
blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)},
|
||||
d24s8_to_rgba8_pipeline{MakeComputePipeline(d24s8_to_rgba8_comp, compute_pipeline_layout)},
|
||||
depth_to_buffer_pipeline{
|
||||
MakeComputePipeline(depth_to_buffer_comp, compute_buffer_pipeline_layout)},
|
||||
depth_blit_pipeline{MakeDepthStencilBlitPipeline()},
|
||||
linear_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eLinear>)},
|
||||
nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eNearest>)} {}
|
||||
|
||||
BlitHelper::~BlitHelper() {
|
||||
device.destroyPipelineLayout(compute_pipeline_layout);
|
||||
device.destroyPipelineLayout(compute_buffer_pipeline_layout);
|
||||
device.destroyPipelineLayout(two_textures_pipeline_layout);
|
||||
device.destroyShaderModule(full_screen_vert);
|
||||
device.destroyShaderModule(d24s8_to_rgba8_comp);
|
||||
device.destroyShaderModule(depth_to_buffer_comp);
|
||||
device.destroyShaderModule(blit_depth_stencil_frag);
|
||||
device.destroyPipeline(depth_to_buffer_pipeline);
|
||||
device.destroyPipeline(d24s8_to_rgba8_pipeline);
|
||||
device.destroyPipeline(depth_blit_pipeline);
|
||||
device.destroySampler(linear_sampler);
|
||||
device.destroySampler(nearest_sampler);
|
||||
}
|
||||
|
||||
void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout,
|
||||
const VideoCore::TextureBlit& blit) {
|
||||
const vk::Offset2D offset{
|
||||
.x = std::min<s32>(blit.dst_rect.left, blit.dst_rect.right),
|
||||
.y = std::min<s32>(blit.dst_rect.bottom, blit.dst_rect.top),
|
||||
};
|
||||
const vk::Extent2D extent{
|
||||
.width = blit.dst_rect.GetWidth(),
|
||||
.height = blit.dst_rect.GetHeight(),
|
||||
};
|
||||
const vk::Viewport viewport{
|
||||
.x = static_cast<float>(offset.x),
|
||||
.y = static_cast<float>(offset.y),
|
||||
.width = static_cast<float>(extent.width),
|
||||
.height = static_cast<float>(extent.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
const vk::Rect2D scissor{
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const float scale_x = static_cast<float>(blit.src_rect.GetWidth());
|
||||
const float scale_y = static_cast<float>(blit.src_rect.GetHeight());
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {scale_x, scale_y},
|
||||
.tex_offset = {static_cast<float>(blit.src_rect.left),
|
||||
static_cast<float>(blit.src_rect.bottom)},
|
||||
};
|
||||
cmdbuf.setViewport(0, viewport);
|
||||
cmdbuf.setScissor(0, scissor);
|
||||
cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants),
|
||||
&push_constants);
|
||||
}
|
||||
|
||||
bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
|
||||
const VideoCore::TextureBlit& blit) {
|
||||
if (!instance.IsShaderStencilExportSupported()) {
|
||||
LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images");
|
||||
return false;
|
||||
}
|
||||
|
||||
const vk::Rect2D dst_render_area = {
|
||||
.offset = {0, 0},
|
||||
.extent = {dest.GetScaledWidth(), dest.GetScaledHeight()},
|
||||
};
|
||||
|
||||
std::array<DescriptorData, 2> textures{};
|
||||
textures[0].image_info = vk::DescriptorImageInfo{
|
||||
.sampler = nearest_sampler,
|
||||
.imageView = source.DepthView(),
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
textures[1].image_info = vk::DescriptorImageInfo{
|
||||
.sampler = nearest_sampler,
|
||||
.imageView = source.StencilView(),
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
const auto descriptor_set = two_textures_provider.Acquire(textures);
|
||||
|
||||
const RenderPass depth_pass = {
|
||||
.framebuffer = dest.Framebuffer(),
|
||||
.render_pass =
|
||||
renderpass_cache.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false),
|
||||
.render_area = dst_render_area,
|
||||
};
|
||||
renderpass_cache.BeginRendering(depth_pass);
|
||||
|
||||
scheduler.Record([blit, descriptor_set, this](vk::CommandBuffer cmdbuf) {
|
||||
const vk::PipelineLayout layout = two_textures_pipeline_layout;
|
||||
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline);
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {});
|
||||
BindBlitState(cmdbuf, layout, blit);
|
||||
cmdbuf.draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.MakeDirty(StateFlags::Pipeline);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
|
||||
const VideoCore::TextureBlit& blit) {
|
||||
std::array<DescriptorData, 3> textures{};
|
||||
textures[0].image_info = vk::DescriptorImageInfo{
|
||||
.imageView = source.DepthView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
};
|
||||
textures[1].image_info = vk::DescriptorImageInfo{
|
||||
.imageView = source.StencilView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
};
|
||||
textures[2].image_info = vk::DescriptorImageInfo{
|
||||
.imageView = dest.ImageView(),
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
const auto descriptor_set = compute_provider.Acquire(textures);
|
||||
|
||||
renderpass_cache.EndRendering();
|
||||
scheduler.Record([this, descriptor_set, blit, src_image = source.Image(),
|
||||
dst_image = dest.Image()](vk::CommandBuffer cmdbuf) {
|
||||
const std::array pre_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask =
|
||||
vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eNone,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
};
|
||||
const std::array post_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead,
|
||||
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask =
|
||||
vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
|
||||
descriptor_set, {});
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, d24s8_to_rgba8_pipeline);
|
||||
|
||||
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
|
||||
cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
|
||||
sizeof(Common::Vec2i), src_offset.AsArray());
|
||||
|
||||
cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests |
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer,
|
||||
const VideoCore::BufferTextureCopy& copy) {
|
||||
std::array<DescriptorData, 3> textures{};
|
||||
textures[0].image_info = vk::DescriptorImageInfo{
|
||||
.sampler = nearest_sampler,
|
||||
.imageView = source.DepthView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
};
|
||||
textures[1].image_info = vk::DescriptorImageInfo{
|
||||
.sampler = nearest_sampler,
|
||||
.imageView = source.StencilView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
};
|
||||
textures[2].buffer_info = vk::DescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = copy.buffer_offset,
|
||||
.range = copy.buffer_size,
|
||||
};
|
||||
|
||||
const auto descriptor_set = compute_buffer_provider.Acquire(textures);
|
||||
|
||||
renderpass_cache.EndRendering();
|
||||
scheduler.Record([this, descriptor_set, copy, src_image = source.Image(),
|
||||
extent = source.RealExtent(false)](vk::CommandBuffer cmdbuf) {
|
||||
const vk::ImageMemoryBarrier pre_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const vk::ImageMemoryBarrier post_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead,
|
||||
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_buffer_pipeline_layout,
|
||||
0, descriptor_set, {});
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, depth_to_buffer_pipeline);
|
||||
|
||||
const ComputeInfo info = {
|
||||
.src_offset = Common::Vec2i{static_cast<int>(copy.texture_rect.left),
|
||||
static_cast<int>(copy.texture_rect.bottom)},
|
||||
.src_extent =
|
||||
Common::Vec2i{static_cast<int>(extent.width), static_cast<int>(extent.height)},
|
||||
};
|
||||
cmdbuf.pushConstants(compute_buffer_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
|
||||
sizeof(ComputeInfo), &info);
|
||||
|
||||
cmdbuf.dispatch(copy.texture_rect.GetWidth() / 8, copy.texture_rect.GetHeight() / 8, 1);
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests |
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::Pipeline BlitHelper::MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout) {
|
||||
const vk::ComputePipelineCreateInfo compute_info = {
|
||||
.stage = MakeStages(shader),
|
||||
.layout = layout,
|
||||
};
|
||||
|
||||
if (const auto result = device.createComputePipeline({}, compute_info);
|
||||
result.result == vk::Result::eSuccess) {
|
||||
return result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() {
|
||||
if (!instance.IsShaderStencilExportSupported()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag);
|
||||
const auto renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
|
||||
VideoCore::PixelFormat::D24S8, false);
|
||||
vk::GraphicsPipelineCreateInfo depth_stencil_info = {
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = two_textures_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
};
|
||||
|
||||
if (const auto result = device.createGraphicsPipeline({}, depth_stencil_info);
|
||||
result.result == vk::Result::eSuccess) {
|
||||
return result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
71
src/video_core/renderer_vulkan/vk_blit_helper.h
Normal file
71
src/video_core/renderer_vulkan/vk_blit_helper.h
Normal file
|
@ -0,0 +1,71 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
|
||||
namespace VideoCore {
|
||||
struct TextureBlit;
|
||||
struct BufferTextureCopy;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class Scheduler;
|
||||
class Surface;
|
||||
|
||||
class BlitHelper {
|
||||
friend class TextureRuntime;
|
||||
|
||||
public:
|
||||
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
|
||||
RenderpassCache& renderpass_cache);
|
||||
~BlitHelper();
|
||||
|
||||
bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
bool DepthToBuffer(Surface& source, vk::Buffer buffer,
|
||||
const VideoCore::BufferTextureCopy& copy);
|
||||
|
||||
private:
|
||||
/// Creates compute pipelines used for blit
|
||||
vk::Pipeline MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout);
|
||||
|
||||
/// Creates graphics pipelines used for blit
|
||||
vk::Pipeline MakeDepthStencilBlitPipeline();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
|
||||
vk::Device device;
|
||||
vk::RenderPass r32_renderpass;
|
||||
|
||||
DescriptorSetProvider compute_provider;
|
||||
DescriptorSetProvider compute_buffer_provider;
|
||||
DescriptorSetProvider two_textures_provider;
|
||||
vk::PipelineLayout compute_pipeline_layout;
|
||||
vk::PipelineLayout compute_buffer_pipeline_layout;
|
||||
vk::PipelineLayout two_textures_pipeline_layout;
|
||||
|
||||
vk::ShaderModule full_screen_vert;
|
||||
vk::ShaderModule d24s8_to_rgba8_comp;
|
||||
vk::ShaderModule depth_to_buffer_comp;
|
||||
vk::ShaderModule blit_depth_stencil_frag;
|
||||
|
||||
vk::Pipeline d24s8_to_rgba8_pipeline;
|
||||
vk::Pipeline depth_to_buffer_pipeline;
|
||||
vk::Pipeline depth_blit_pipeline;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -9,6 +9,7 @@
|
|||
#define VK_NO_PROTOTYPES
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#define VULKAN_HPP_NO_UNION_CONSTRUCTORS
|
||||
#define VULKAN_HPP_NO_STRUCT_SETTERS
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
|
|
141
src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
Normal file
141
src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
Normal file
|
@ -0,0 +1,141 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire",
|
||||
MP_RGB(64, 128, 256));
|
||||
|
||||
constexpr u32 MAX_BATCH_SIZE = 8;
|
||||
|
||||
DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} {
|
||||
auto& pool = pools.emplace_back();
|
||||
pool = CreatePool();
|
||||
}
|
||||
|
||||
DescriptorPool::~DescriptorPool() = default;
|
||||
|
||||
std::vector<vk::DescriptorSet> DescriptorPool::Allocate(vk::DescriptorSetLayout layout,
|
||||
u32 num_sets) {
|
||||
std::array<vk::DescriptorSetLayout, MAX_BATCH_SIZE> layouts;
|
||||
layouts.fill(layout);
|
||||
|
||||
u32 current_pool = 0;
|
||||
vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = *pools[current_pool],
|
||||
.descriptorSetCount = num_sets,
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
return instance.GetDevice().allocateDescriptorSets(alloc_info);
|
||||
} catch (const vk::OutOfPoolMemoryError&) {
|
||||
current_pool++;
|
||||
if (current_pool == pools.size()) {
|
||||
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
|
||||
auto& pool = pools.emplace_back();
|
||||
pool = CreatePool();
|
||||
}
|
||||
alloc_info.descriptorPool = *pools[current_pool];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) {
|
||||
const auto sets = Allocate(layout, 1);
|
||||
return sets[0];
|
||||
}
|
||||
|
||||
vk::UniqueDescriptorPool DescriptorPool::CreatePool() {
|
||||
// Choose a sane pool size good for most games
|
||||
static constexpr std::array<vk::DescriptorPoolSize, 6> pool_sizes = {{
|
||||
{vk::DescriptorType::eUniformBufferDynamic, 64},
|
||||
{vk::DescriptorType::eUniformTexelBuffer, 64},
|
||||
{vk::DescriptorType::eCombinedImageSampler, 4096},
|
||||
{vk::DescriptorType::eSampledImage, 256},
|
||||
{vk::DescriptorType::eStorageImage, 256},
|
||||
{vk::DescriptorType::eStorageBuffer, 32},
|
||||
}};
|
||||
|
||||
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
|
||||
.maxSets = 4098,
|
||||
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||
.pPoolSizes = pool_sizes.data(),
|
||||
};
|
||||
|
||||
return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info);
|
||||
}
|
||||
|
||||
DescriptorSetProvider::DescriptorSetProvider(
|
||||
const Instance& instance, DescriptorPool& pool_,
|
||||
std::span<const vk::DescriptorSetLayoutBinding> bindings)
|
||||
: pool{pool_}, device{instance.GetDevice()} {
|
||||
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
|
||||
|
||||
for (u32 i = 0; i < bindings.size(); i++) {
|
||||
update_entries[i] = vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = bindings[i].binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = bindings[i].descriptorCount,
|
||||
.descriptorType = bindings[i].descriptorType,
|
||||
.offset = i * sizeof(DescriptorData),
|
||||
.stride = sizeof(DescriptorData),
|
||||
};
|
||||
}
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo layout_info = {
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
layout = device.createDescriptorSetLayoutUnique(layout_info);
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(bindings.size()),
|
||||
.pDescriptorUpdateEntries = update_entries.data(),
|
||||
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
|
||||
.descriptorSetLayout = *layout,
|
||||
};
|
||||
update_template = device.createDescriptorUpdateTemplateUnique(template_info);
|
||||
}
|
||||
|
||||
DescriptorSetProvider::~DescriptorSetProvider() = default;
|
||||
|
||||
vk::DescriptorSet DescriptorSetProvider::Acquire(std::span<const DescriptorData> data) {
|
||||
MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire);
|
||||
DescriptorSetData key{};
|
||||
std::memcpy(key.data(), data.data(), data.size_bytes());
|
||||
const auto [it, new_set] = descriptor_set_map.try_emplace(key);
|
||||
if (!new_set) {
|
||||
return it->second;
|
||||
}
|
||||
if (free_sets.empty()) {
|
||||
free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE);
|
||||
}
|
||||
it.value() = free_sets.back();
|
||||
free_sets.pop_back();
|
||||
device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) {
|
||||
for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) {
|
||||
const auto& [data, set] = *it;
|
||||
const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) {
|
||||
return info.image_info.imageView == image_view;
|
||||
});
|
||||
if (has_image) {
|
||||
free_sets.push_back(set);
|
||||
it = descriptor_set_map.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
92
src/video_core/renderer_vulkan/vk_descriptor_pool.h
Normal file
92
src/video_core/renderer_vulkan/vk_descriptor_pool.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <vector>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
#include "common/hash.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
|
||||
constexpr u32 MAX_DESCRIPTORS = 7;
|
||||
|
||||
union DescriptorData {
|
||||
vk::DescriptorImageInfo image_info;
|
||||
vk::DescriptorBufferInfo buffer_info;
|
||||
vk::BufferView buffer_view;
|
||||
|
||||
bool operator==(const DescriptorData& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(DescriptorData)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
|
||||
|
||||
struct DataHasher {
|
||||
u64 operator()(const DescriptorSetData& data) const noexcept {
|
||||
return Common::ComputeHash64(data.data(), sizeof(data));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* An interface for allocating descriptor sets that manages a collection of descriptor pools.
|
||||
*/
|
||||
class DescriptorPool {
|
||||
public:
|
||||
explicit DescriptorPool(const Instance& instance);
|
||||
~DescriptorPool();
|
||||
|
||||
std::vector<vk::DescriptorSet> Allocate(vk::DescriptorSetLayout layout, u32 num_sets);
|
||||
|
||||
vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout);
|
||||
|
||||
private:
|
||||
vk::UniqueDescriptorPool CreatePool();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
std::vector<vk::UniqueDescriptorPool> pools;
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocates and caches descriptor sets of a specific layout.
|
||||
*/
|
||||
class DescriptorSetProvider {
|
||||
public:
|
||||
explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool,
|
||||
std::span<const vk::DescriptorSetLayoutBinding> bindings);
|
||||
~DescriptorSetProvider();
|
||||
|
||||
vk::DescriptorSet Acquire(std::span<const DescriptorData> data);
|
||||
|
||||
void FreeWithImage(vk::ImageView image_view);
|
||||
|
||||
[[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept {
|
||||
return *layout;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept {
|
||||
return layout.get();
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept {
|
||||
return *update_template;
|
||||
}
|
||||
|
||||
private:
|
||||
DescriptorPool& pool;
|
||||
vk::Device device;
|
||||
vk::UniqueDescriptorSetLayout layout;
|
||||
vk::UniqueDescriptorUpdateTemplate update_template;
|
||||
std::vector<vk::DescriptorSet> free_sets;
|
||||
tsl::robin_map<DescriptorSetData, vk::DescriptorSet, DataHasher> descriptor_set_map;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
290
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
290
src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Normal file
|
@ -0,0 +1,290 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/hash.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32));
|
||||
|
||||
vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
case 1:
|
||||
return vk::ShaderStageFlagBits::eFragment;
|
||||
case 2:
|
||||
return vk::ShaderStageFlagBits::eGeometry;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
return vk::ShaderStageFlagBits::eVertex;
|
||||
}
|
||||
|
||||
u64 PipelineInfo::Hash(const Instance& instance) const {
|
||||
u64 info_hash = 0;
|
||||
const auto append_hash = [&info_hash](const auto& data) {
|
||||
const u64 data_hash = Common::ComputeStructHash64(data);
|
||||
info_hash = Common::HashCombine(info_hash, data_hash);
|
||||
};
|
||||
|
||||
append_hash(vertex_layout);
|
||||
append_hash(attachments);
|
||||
append_hash(blending);
|
||||
|
||||
if (!instance.IsExtendedDynamicStateSupported()) {
|
||||
append_hash(rasterization);
|
||||
append_hash(depth_stencil);
|
||||
}
|
||||
|
||||
return info_hash;
|
||||
}
|
||||
|
||||
Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {}
|
||||
|
||||
Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code)
|
||||
: Shader{instance} {
|
||||
module = Compile(code, stage, instance.GetDevice());
|
||||
MarkDone();
|
||||
}
|
||||
|
||||
Shader::~Shader() {
|
||||
if (device && module) {
|
||||
device.destroyShaderModule(module);
|
||||
}
|
||||
}
|
||||
|
||||
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_,
|
||||
const PipelineInfo& info_, vk::PipelineCache pipeline_cache_,
|
||||
vk::PipelineLayout layout_, std::array<Shader*, 3> stages_,
|
||||
Common::ThreadWorker* worker_)
|
||||
: instance{instance_}, renderpass_cache{renderpass_cache_}, worker{worker_},
|
||||
pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_} {}
|
||||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
||||
bool GraphicsPipeline::TryBuild(bool wait_built) {
|
||||
if (is_pending) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the shaders haven't been compiled yet, we cannot proceed
|
||||
const bool shaders_pending = std::any_of(
|
||||
stages.begin(), stages.end(), [](Shader* shader) { return shader && !shader->IsDone(); });
|
||||
if (!wait_built && shaders_pending) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ask the driver if it can give us the pipeline quickly
|
||||
if (!wait_built && instance.IsPipelineCreationCacheControlSupported() && Build(true)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fallback to (a)synchronous compilation
|
||||
if (worker) {
|
||||
worker->QueueWork([this] { Build(); });
|
||||
is_pending = true;
|
||||
} else {
|
||||
Build();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GraphicsPipeline::Build(bool fail_on_compile_required) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Pipeline);
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
|
||||
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
|
||||
const auto& binding = info.vertex_layout.bindings[i];
|
||||
bindings[i] = vk::VertexInputBindingDescription{
|
||||
.binding = binding.binding,
|
||||
.stride = binding.stride,
|
||||
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
|
||||
: vk::VertexInputRate::eVertex,
|
||||
};
|
||||
}
|
||||
|
||||
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
|
||||
const auto& attr = info.vertex_layout.attributes[i];
|
||||
const FormatTraits& traits = instance.GetTraits(attr.type, attr.size);
|
||||
attributes[i] = vk::VertexInputAttributeDescription{
|
||||
.location = attr.location,
|
||||
.binding = attr.binding,
|
||||
.format = traits.native,
|
||||
.offset = attr.offset,
|
||||
};
|
||||
|
||||
// At the end there's always the fixed binding which takes up
|
||||
// at least 16 bytes so we should always be able to alias.
|
||||
if (traits.needs_emulation) {
|
||||
const FormatTraits& comp_four_traits = instance.GetTraits(attr.type, 4);
|
||||
attributes[i].format = comp_four_traits.native;
|
||||
}
|
||||
}
|
||||
|
||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
|
||||
.pVertexBindingDescriptions = bindings.data(),
|
||||
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
|
||||
.pVertexAttributeDescriptions = attributes.data(),
|
||||
};
|
||||
|
||||
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
|
||||
.primitiveRestartEnable = false,
|
||||
};
|
||||
|
||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
|
||||
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
|
||||
.depthBiasEnable = false,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
|
||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
.sampleShadingEnable = false,
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
|
||||
.blendEnable = info.blending.blend_enable,
|
||||
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
|
||||
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
|
||||
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
|
||||
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
|
||||
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
|
||||
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
|
||||
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask),
|
||||
};
|
||||
|
||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||
.logicOpEnable = !info.blending.blend_enable && !instance.NeedsLogicOpEmulation(),
|
||||
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &colorblend_attachment,
|
||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
||||
};
|
||||
|
||||
const vk::Viewport viewport = {
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = 1.0f,
|
||||
.height = 1.0f,
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
|
||||
const vk::Rect2D scissor = {
|
||||
.offset = {0, 0},
|
||||
.extent = {1, 1},
|
||||
};
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.viewportCount = 1,
|
||||
.pViewports = &viewport,
|
||||
.scissorCount = 1,
|
||||
.pScissors = &scissor,
|
||||
};
|
||||
|
||||
boost::container::static_vector<vk::DynamicState, 20> dynamic_states = {
|
||||
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
|
||||
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
|
||||
vk::DynamicState::eStencilReference, vk::DynamicState::eBlendConstants,
|
||||
};
|
||||
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
constexpr std::array extended = {
|
||||
vk::DynamicState::eCullModeEXT, vk::DynamicState::eDepthCompareOpEXT,
|
||||
vk::DynamicState::eDepthTestEnableEXT, vk::DynamicState::eDepthWriteEnableEXT,
|
||||
vk::DynamicState::eFrontFaceEXT, vk::DynamicState::ePrimitiveTopologyEXT,
|
||||
vk::DynamicState::eStencilOpEXT, vk::DynamicState::eStencilTestEnableEXT,
|
||||
};
|
||||
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
|
||||
}
|
||||
|
||||
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
|
||||
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
|
||||
.pDynamicStates = dynamic_states.data(),
|
||||
};
|
||||
|
||||
const vk::StencilOpState stencil_op_state = {
|
||||
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
|
||||
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
|
||||
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
|
||||
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op),
|
||||
};
|
||||
|
||||
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
|
||||
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
|
||||
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
|
||||
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
|
||||
.depthBoundsTestEnable = false,
|
||||
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
|
||||
.front = stencil_op_state,
|
||||
.back = stencil_op_state,
|
||||
};
|
||||
|
||||
u32 shader_count = 0;
|
||||
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
|
||||
for (std::size_t i = 0; i < stages.size(); i++) {
|
||||
Shader* shader = stages[i];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
|
||||
shader->WaitDone();
|
||||
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
|
||||
.stage = MakeShaderStage(i),
|
||||
.module = shader->Handle(),
|
||||
.pName = "main",
|
||||
};
|
||||
}
|
||||
|
||||
vk::GraphicsPipelineCreateInfo pipeline_info = {
|
||||
.stageCount = shader_count,
|
||||
.pStages = shader_stages.data(),
|
||||
.pVertexInputState = &vertex_input_info,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pViewportState = &viewport_info,
|
||||
.pRasterizationState = &raster_state,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = &depth_info,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_info,
|
||||
.layout = pipeline_layout,
|
||||
.renderPass =
|
||||
renderpass_cache.GetRenderpass(info.attachments.color, info.attachments.depth, false),
|
||||
};
|
||||
|
||||
if (fail_on_compile_required) {
|
||||
pipeline_info.flags |= vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT;
|
||||
}
|
||||
|
||||
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
|
||||
if (result.result == vk::Result::eSuccess) {
|
||||
pipeline = std::move(result.value);
|
||||
} else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) {
|
||||
return false;
|
||||
} else {
|
||||
UNREACHABLE_MSG("Graphics pipeline creation failed!");
|
||||
}
|
||||
|
||||
MarkDone();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
192
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
192
src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Normal file
|
@ -0,0 +1,192 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/thread_worker.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
struct AsyncHandle {
|
||||
public:
|
||||
AsyncHandle(bool is_done_ = false) : is_done{is_done_} {}
|
||||
|
||||
[[nodiscard]] bool IsDone() noexcept {
|
||||
return is_done.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
void WaitDone() noexcept {
|
||||
std::unique_lock lock{mutex};
|
||||
condvar.wait(lock, [this] { return is_done.load(std::memory_order::relaxed); });
|
||||
}
|
||||
|
||||
void MarkDone(bool done = true) noexcept {
|
||||
std::scoped_lock lock{mutex};
|
||||
is_done = done;
|
||||
condvar.notify_all();
|
||||
}
|
||||
|
||||
private:
|
||||
std::condition_variable condvar;
|
||||
std::mutex mutex;
|
||||
std::atomic_bool is_done{false};
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
|
||||
constexpr u32 MAX_SHADER_STAGES = 3;
|
||||
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr u32 MAX_VERTEX_BINDINGS = 16;
|
||||
|
||||
/**
|
||||
* The pipeline state is tightly packed with bitfields to reduce
|
||||
* the overhead of hashing as much as possible
|
||||
*/
|
||||
union RasterizationState {
|
||||
u8 value = 0;
|
||||
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
|
||||
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
|
||||
};
|
||||
|
||||
union DepthStencilState {
|
||||
u32 value = 0;
|
||||
BitField<0, 1, u32> depth_test_enable;
|
||||
BitField<1, 1, u32> depth_write_enable;
|
||||
BitField<2, 1, u32> stencil_test_enable;
|
||||
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
|
||||
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
|
||||
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
|
||||
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
|
||||
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
|
||||
};
|
||||
|
||||
struct BlendingState {
|
||||
u16 blend_enable;
|
||||
u16 color_write_mask;
|
||||
Pica::FramebufferRegs::LogicOp logic_op;
|
||||
union {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
|
||||
BitField<4, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
|
||||
BitField<8, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
|
||||
BitField<11, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
|
||||
BitField<15, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
|
||||
BitField<19, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
|
||||
};
|
||||
};
|
||||
|
||||
struct DynamicState {
|
||||
u32 blend_color = 0;
|
||||
u8 stencil_reference;
|
||||
u8 stencil_compare_mask;
|
||||
u8 stencil_write_mask;
|
||||
|
||||
bool operator==(const DynamicState& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(DynamicState)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
union VertexBinding {
|
||||
u16 value = 0;
|
||||
BitField<0, 4, u16> binding;
|
||||
BitField<4, 1, u16> fixed;
|
||||
BitField<5, 11, u16> stride;
|
||||
};
|
||||
|
||||
union VertexAttribute {
|
||||
u32 value = 0;
|
||||
BitField<0, 4, u32> binding;
|
||||
BitField<4, 4, u32> location;
|
||||
BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type;
|
||||
BitField<11, 3, u32> size;
|
||||
BitField<14, 11, u32> offset;
|
||||
};
|
||||
|
||||
struct VertexLayout {
|
||||
u8 binding_count;
|
||||
u8 attribute_count;
|
||||
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
|
||||
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
|
||||
};
|
||||
|
||||
struct AttachmentInfo {
|
||||
VideoCore::PixelFormat color;
|
||||
VideoCore::PixelFormat depth;
|
||||
};
|
||||
|
||||
/**
|
||||
* Information about a graphics/compute pipeline
|
||||
*/
|
||||
struct PipelineInfo {
|
||||
VertexLayout vertex_layout;
|
||||
BlendingState blending;
|
||||
AttachmentInfo attachments;
|
||||
RasterizationState rasterization;
|
||||
DepthStencilState depth_stencil;
|
||||
DynamicState dynamic;
|
||||
|
||||
[[nodiscard]] u64 Hash(const Instance& instance) const;
|
||||
|
||||
[[nodiscard]] bool IsDepthWriteEnabled() const noexcept {
|
||||
const bool has_stencil = attachments.depth == VideoCore::PixelFormat::D24S8;
|
||||
const bool depth_write =
|
||||
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
|
||||
const bool stencil_write =
|
||||
has_stencil && depth_stencil.stencil_test_enable && dynamic.stencil_write_mask != 0;
|
||||
|
||||
return depth_write || stencil_write;
|
||||
}
|
||||
};
|
||||
|
||||
struct Shader : public Common::AsyncHandle {
|
||||
explicit Shader(const Instance& instance);
|
||||
explicit Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code);
|
||||
~Shader();
|
||||
|
||||
[[nodiscard]] vk::ShaderModule Handle() const noexcept {
|
||||
return module;
|
||||
}
|
||||
|
||||
vk::ShaderModule module;
|
||||
vk::Device device;
|
||||
std::string program;
|
||||
};
|
||||
|
||||
class GraphicsPipeline : public Common::AsyncHandle {
|
||||
public:
|
||||
explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache,
|
||||
const PipelineInfo& info, vk::PipelineCache pipeline_cache,
|
||||
vk::PipelineLayout layout, std::array<Shader*, 3> stages,
|
||||
Common::ThreadWorker* worker);
|
||||
~GraphicsPipeline();
|
||||
|
||||
bool TryBuild(bool wait_built);
|
||||
|
||||
bool Build(bool fail_on_compile_required = false);
|
||||
|
||||
[[nodiscard]] vk::Pipeline Handle() const noexcept {
|
||||
return *pipeline;
|
||||
}
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
RenderpassCache& renderpass_cache;
|
||||
Common::ThreadWorker* worker;
|
||||
|
||||
vk::UniquePipeline pipeline;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::PipelineCache pipeline_cache;
|
||||
|
||||
PipelineInfo info;
|
||||
std::array<Shader*, 3> stages;
|
||||
bool is_pending{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -210,12 +210,16 @@ FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk::
|
|||
best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
|
||||
vk::ImageUsageFlagBits::eTransferSrc;
|
||||
}
|
||||
if (supports_attachment) {
|
||||
// Attachment flag is only needed for color and depth formats.
|
||||
if (supports_attachment &&
|
||||
VideoCore::GetFormatType(pixel_format) != VideoCore::SurfaceType::Texture) {
|
||||
best_usage |= (format_aspect & vk::ImageAspectFlagBits::eDepth)
|
||||
? vk::ImageUsageFlagBits::eDepthStencilAttachment
|
||||
: vk::ImageUsageFlagBits::eColorAttachment;
|
||||
}
|
||||
if (supports_storage) {
|
||||
// Storage flag is only needed for shadow rendering with RGBA8 texture.
|
||||
// Keeping it disables can boost performance on mobile drivers.
|
||||
if (supports_storage && pixel_format == VideoCore::PixelFormat::RGBA8) {
|
||||
best_usage |= vk::ImageUsageFlagBits::eStorage;
|
||||
}
|
||||
|
||||
|
|
207
src/video_core/renderer_vulkan/vk_master_semaphore.cpp
Normal file
207
src/video_core/renderer_vulkan/vk_master_semaphore.cpp
Normal file
|
@ -0,0 +1,207 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <limits>
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u64 WAIT_TIMEOUT = std::numeric_limits<u64>::max();
|
||||
|
||||
MasterSemaphoreTimeline::MasterSemaphoreTimeline(const Instance& instance_) : instance{instance_} {
|
||||
const vk::StructureChain semaphore_chain = {
|
||||
vk::SemaphoreCreateInfo{},
|
||||
vk::SemaphoreTypeCreateInfoKHR{
|
||||
.semaphoreType = vk::SemaphoreType::eTimeline,
|
||||
.initialValue = 0,
|
||||
},
|
||||
};
|
||||
semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get());
|
||||
}
|
||||
|
||||
MasterSemaphoreTimeline::~MasterSemaphoreTimeline() = default;
|
||||
|
||||
void MasterSemaphoreTimeline::Refresh() {
|
||||
u64 this_tick{};
|
||||
u64 counter{};
|
||||
do {
|
||||
this_tick = gpu_tick.load(std::memory_order_acquire);
|
||||
counter = instance.GetDevice().getSemaphoreCounterValueKHR(*semaphore);
|
||||
if (counter < this_tick) {
|
||||
return;
|
||||
}
|
||||
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void MasterSemaphoreTimeline::Wait(u64 tick) {
|
||||
// No need to wait if the GPU is ahead of the tick
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
// Update the GPU tick and try again
|
||||
Refresh();
|
||||
if (IsFree(tick)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If none of the above is hit, fallback to a regular wait
|
||||
const vk::SemaphoreWaitInfoKHR wait_info = {
|
||||
.semaphoreCount = 1,
|
||||
.pSemaphores = &semaphore.get(),
|
||||
.pValues = &tick,
|
||||
};
|
||||
|
||||
while (instance.GetDevice().waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) !=
|
||||
vk::Result::eSuccess) {
|
||||
}
|
||||
Refresh();
|
||||
}
|
||||
|
||||
void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
|
||||
vk::Semaphore signal, u64 signal_value) {
|
||||
cmdbuf.end();
|
||||
|
||||
const u32 num_signal_semaphores = signal ? 2U : 1U;
|
||||
const std::array signal_values{signal_value, u64(0)};
|
||||
const std::array signal_semaphores{Handle(), signal};
|
||||
|
||||
const u32 num_wait_semaphores = wait ? 2U : 1U;
|
||||
const std::array wait_values{signal_value - 1, u64(1)};
|
||||
const std::array wait_semaphores{Handle(), wait};
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
|
||||
.waitSemaphoreValueCount = num_wait_semaphores,
|
||||
.pWaitSemaphoreValues = wait_values.data(),
|
||||
.signalSemaphoreValueCount = num_signal_semaphores,
|
||||
.pSignalSemaphoreValues = signal_values.data(),
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.pNext = &timeline_si,
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = signal_semaphores.data(),
|
||||
};
|
||||
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
constexpr u64 FENCE_RESERVE = 8;
|
||||
|
||||
MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} {
|
||||
const vk::Device device{instance.GetDevice()};
|
||||
for (u64 i = 0; i < FENCE_RESERVE; i++) {
|
||||
free_queue.push(device.createFenceUnique({}));
|
||||
}
|
||||
wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
|
||||
}
|
||||
|
||||
MasterSemaphoreFence::~MasterSemaphoreFence() = default;
|
||||
|
||||
void MasterSemaphoreFence::Refresh() {}
|
||||
|
||||
void MasterSemaphoreFence::Wait(u64 tick) {
|
||||
while (true) {
|
||||
u64 current_value = gpu_tick.load(std::memory_order_relaxed);
|
||||
if (current_value >= tick) {
|
||||
return;
|
||||
}
|
||||
gpu_tick.wait(current_value);
|
||||
}
|
||||
}
|
||||
|
||||
void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
|
||||
vk::Semaphore signal, u64 signal_value) {
|
||||
cmdbuf.end();
|
||||
|
||||
const u32 num_signal_semaphores = signal ? 1U : 0U;
|
||||
const u32 num_wait_semaphores = wait ? 1U : 0U;
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 1> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
};
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = num_wait_semaphores,
|
||||
.pWaitSemaphores = &wait,
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = num_signal_semaphores,
|
||||
.pSignalSemaphores = &signal,
|
||||
};
|
||||
|
||||
vk::UniqueFence fence{GetFreeFence()};
|
||||
try {
|
||||
instance.GetGraphicsQueue().submit(submit_info, *fence);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
std::scoped_lock lock{wait_mutex};
|
||||
wait_queue.push({
|
||||
.handle = std::move(fence),
|
||||
.signal_value = signal_value,
|
||||
});
|
||||
wait_cv.notify_one();
|
||||
}
|
||||
|
||||
void MasterSemaphoreFence::WaitThread(std::stop_token token) {
|
||||
const vk::Device device{instance.GetDevice()};
|
||||
while (!token.stop_requested()) {
|
||||
Fence fence;
|
||||
{
|
||||
std::unique_lock lock{wait_mutex};
|
||||
Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
|
||||
if (token.stop_requested()) {
|
||||
return;
|
||||
}
|
||||
fence = std::move(wait_queue.front());
|
||||
wait_queue.pop();
|
||||
}
|
||||
|
||||
const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT);
|
||||
if (result != vk::Result::eSuccess) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result));
|
||||
UNREACHABLE();
|
||||
}
|
||||
device.resetFences(*fence.handle);
|
||||
|
||||
gpu_tick.store(fence.signal_value);
|
||||
gpu_tick.notify_all();
|
||||
|
||||
std::scoped_lock lock{free_mutex};
|
||||
free_queue.push(std::move(fence.handle));
|
||||
}
|
||||
}
|
||||
|
||||
vk::UniqueFence MasterSemaphoreFence::GetFreeFence() {
|
||||
std::scoped_lock lock{free_mutex};
|
||||
if (free_queue.empty()) {
|
||||
return instance.GetDevice().createFenceUnique({});
|
||||
}
|
||||
|
||||
vk::UniqueFence fence{std::move(free_queue.front())};
|
||||
free_queue.pop();
|
||||
return fence;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
107
src/video_core/renderer_vulkan/vk_master_semaphore.h
Normal file
107
src/video_core/renderer_vulkan/vk_master_semaphore.h
Normal file
|
@ -0,0 +1,107 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <queue>
|
||||
#include "common/common_types.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
class MasterSemaphore {
|
||||
public:
|
||||
virtual ~MasterSemaphore() = default;
|
||||
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return current_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 KnownGpuTick() const noexcept {
|
||||
return gpu_tick.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return KnownGpuTick() >= tick;
|
||||
}
|
||||
|
||||
[[nodiscard]] u64 NextTick() noexcept {
|
||||
return current_tick.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
/// Refresh the known GPU tick
|
||||
virtual void Refresh() = 0;
|
||||
|
||||
/// Waits for a tick to be hit on the GPU
|
||||
virtual void Wait(u64 tick) = 0;
|
||||
|
||||
/// Submits the provided command buffer for execution
|
||||
virtual void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value) = 0;
|
||||
|
||||
protected:
|
||||
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
|
||||
std::atomic<u64> current_tick{1}; ///< Current logical tick.
|
||||
};
|
||||
|
||||
class MasterSemaphoreTimeline : public MasterSemaphore {
|
||||
public:
|
||||
explicit MasterSemaphoreTimeline(const Instance& instance);
|
||||
~MasterSemaphoreTimeline() override;
|
||||
|
||||
[[nodiscard]] vk::Semaphore Handle() const noexcept {
|
||||
return semaphore.get();
|
||||
}
|
||||
|
||||
void Refresh() override;
|
||||
|
||||
void Wait(u64 tick) override;
|
||||
|
||||
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value) override;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
vk::UniqueSemaphore semaphore; ///< Timeline semaphore.
|
||||
};
|
||||
|
||||
class MasterSemaphoreFence : public MasterSemaphore {
|
||||
public:
|
||||
explicit MasterSemaphoreFence(const Instance& instance);
|
||||
~MasterSemaphoreFence() override;
|
||||
|
||||
void Refresh() override;
|
||||
|
||||
void Wait(u64 tick) override;
|
||||
|
||||
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
|
||||
u64 signal_value) override;
|
||||
|
||||
private:
|
||||
void WaitThread(std::stop_token token);
|
||||
|
||||
vk::UniqueFence GetFreeFence();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
||||
struct Fence {
|
||||
vk::UniqueFence handle;
|
||||
u64 signal_value;
|
||||
};
|
||||
|
||||
std::queue<vk::UniqueFence> free_queue;
|
||||
std::queue<Fence> wait_queue;
|
||||
std::mutex free_mutex;
|
||||
std::mutex wait_mutex;
|
||||
std::condition_variable_any wait_cv;
|
||||
std::jthread wait_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
519
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
519
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Normal file
|
@ -0,0 +1,519 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/common_paths.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_vulkan/pica_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32));
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum ProgramType : u32 {
|
||||
VS = 0,
|
||||
GS = 2,
|
||||
FS = 1,
|
||||
};
|
||||
|
||||
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
|
||||
switch (format) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
|
||||
return sizeof(float) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
return sizeof(u16) * size;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
return sizeof(u8) * size;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat format) {
|
||||
switch (format) {
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
|
||||
return AttribLoadFlags::Sint;
|
||||
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
|
||||
return AttribLoadFlags::Uint;
|
||||
default:
|
||||
return AttribLoadFlags::Float;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 5> BUFFER_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex},
|
||||
{1, vk::DescriptorType::eUniformBufferDynamic, 1,
|
||||
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry |
|
||||
vk::ShaderStageFlagBits::eFragment},
|
||||
{2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
}};
|
||||
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 4> TEXTURE_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
}};
|
||||
|
||||
// TODO: Use descriptor array for shadow cube
|
||||
constexpr std::array<vk::DescriptorSetLayoutBinding, 7> SHADOW_BINDINGS = {{
|
||||
{0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
{6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
|
||||
}};
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
RenderpassCache& renderpass_cache_, DescriptorPool& pool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_},
|
||||
num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)},
|
||||
workers{num_worker_threads, "Pipeline workers"},
|
||||
descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS},
|
||||
DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS},
|
||||
DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}},
|
||||
trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex,
|
||||
GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} {
|
||||
BuildLayout();
|
||||
}
|
||||
|
||||
void PipelineCache::BuildLayout() {
|
||||
std::array<vk::DescriptorSetLayout, NUM_RASTERIZER_SETS> descriptor_set_layouts;
|
||||
std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(),
|
||||
descriptor_set_layouts.begin(),
|
||||
[](const auto& provider) { return provider.Layout(); });
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = NUM_RASTERIZER_SETS,
|
||||
.pSetLayouts = descriptor_set_layouts.data(),
|
||||
.pushConstantRangeCount = 0,
|
||||
.pPushConstantRanges = nullptr,
|
||||
};
|
||||
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
SaveDiskCache();
|
||||
}
|
||||
|
||||
void PipelineCache::LoadDiskCache() {
|
||||
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
|
||||
instance.GetVendorID(), instance.GetDeviceID());
|
||||
vk::PipelineCacheCreateInfo cache_info = {
|
||||
.initialDataSize = 0,
|
||||
.pInitialData = nullptr,
|
||||
};
|
||||
|
||||
std::vector<u8> cache_data;
|
||||
FileUtil::IOFile cache_file{cache_file_path, "r"};
|
||||
if (cache_file.IsOpen()) {
|
||||
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
|
||||
|
||||
const u64 cache_file_size = cache_file.GetSize();
|
||||
cache_data.resize(cache_file_size);
|
||||
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
|
||||
if (!IsCacheValid(cache_data)) {
|
||||
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring");
|
||||
} else {
|
||||
cache_info.initialDataSize = cache_file_size;
|
||||
cache_info.pInitialData = cache_data.data();
|
||||
}
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
pipeline_cache = device.createPipelineCacheUnique(cache_info);
|
||||
}
|
||||
|
||||
void PipelineCache::SaveDiskCache() {
|
||||
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
|
||||
instance.GetVendorID(), instance.GetDeviceID());
|
||||
FileUtil::IOFile cache_file{cache_file_path, "wb"};
|
||||
if (!cache_file.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Unable to open pipeline cache for writing");
|
||||
return;
|
||||
}
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
auto cache_data = device.getPipelineCacheData(*pipeline_cache);
|
||||
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
|
||||
LOG_ERROR(Render_Vulkan, "Error during pipeline cache write");
|
||||
return;
|
||||
}
|
||||
|
||||
cache_file.Close();
|
||||
}
|
||||
|
||||
bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Bind);
|
||||
|
||||
u64 shader_hash = 0;
|
||||
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
|
||||
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
|
||||
}
|
||||
|
||||
const u64 info_hash = info.Hash(instance);
|
||||
const u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
|
||||
|
||||
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash);
|
||||
if (new_pipeline) {
|
||||
it.value() = std::make_unique<GraphicsPipeline>(
|
||||
instance, renderpass_cache, info, *pipeline_cache, *pipeline_layout, current_shaders,
|
||||
wait_built ? nullptr : &workers);
|
||||
}
|
||||
|
||||
GraphicsPipeline* const pipeline{it->second.get()};
|
||||
if (!pipeline->IsDone() && !pipeline->TryBuild(wait_built)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) {
|
||||
if (!set_dirty[i]) {
|
||||
continue;
|
||||
}
|
||||
bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]);
|
||||
set_dirty[i] = false;
|
||||
}
|
||||
|
||||
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
|
||||
const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty;
|
||||
scheduler.Record([this, is_dirty, pipeline_dirty, pipeline,
|
||||
current_dynamic = current_info.dynamic, dynamic = info.dynamic,
|
||||
descriptor_sets = bound_descriptor_sets, offsets = offsets,
|
||||
current_rasterization = current_info.rasterization,
|
||||
current_depth_stencil = current_info.depth_stencil,
|
||||
rasterization = info.rasterization,
|
||||
depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) {
|
||||
if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) {
|
||||
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
dynamic.stencil_compare_mask);
|
||||
}
|
||||
|
||||
if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) {
|
||||
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
dynamic.stencil_write_mask);
|
||||
}
|
||||
|
||||
if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) {
|
||||
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
dynamic.stencil_reference);
|
||||
}
|
||||
|
||||
if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) {
|
||||
const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color);
|
||||
cmdbuf.setBlendConstants(color.AsArray());
|
||||
}
|
||||
|
||||
if (instance.IsExtendedDynamicStateSupported()) {
|
||||
if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) {
|
||||
cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode));
|
||||
cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode));
|
||||
}
|
||||
|
||||
if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op ||
|
||||
is_dirty) {
|
||||
cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op));
|
||||
}
|
||||
|
||||
if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable ||
|
||||
is_dirty) {
|
||||
cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable);
|
||||
}
|
||||
|
||||
if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable ||
|
||||
is_dirty) {
|
||||
cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable);
|
||||
}
|
||||
|
||||
if (rasterization.topology != current_rasterization.topology || is_dirty) {
|
||||
cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology));
|
||||
}
|
||||
|
||||
if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable ||
|
||||
is_dirty) {
|
||||
cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable);
|
||||
}
|
||||
|
||||
if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op ||
|
||||
depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op ||
|
||||
depth_stencil.stencil_depth_fail_op !=
|
||||
current_depth_stencil.stencil_depth_fail_op ||
|
||||
depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op ||
|
||||
is_dirty) {
|
||||
cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack,
|
||||
PicaToVK::StencilOp(depth_stencil.stencil_fail_op),
|
||||
PicaToVK::StencilOp(depth_stencil.stencil_pass_op),
|
||||
PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op),
|
||||
PicaToVK::CompareFunc(depth_stencil.stencil_compare_op));
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline_dirty) {
|
||||
if (!pipeline->IsDone()) {
|
||||
pipeline->WaitDone();
|
||||
}
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
}
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
|
||||
descriptor_sets, offsets);
|
||||
});
|
||||
|
||||
current_info = info;
|
||||
current_pipeline = pipeline;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout) {
|
||||
PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance};
|
||||
config.state.use_geometry_shader = instance.UseGeometryShaders();
|
||||
|
||||
for (u32 i = 0; i < layout.attribute_count; i++) {
|
||||
const VertexAttribute& attr = layout.attributes[i];
|
||||
const FormatTraits& traits = instance.GetTraits(attr.type, attr.size);
|
||||
const u32 location = attr.location.Value();
|
||||
AttribLoadFlags& flags = config.state.load_flags[location];
|
||||
|
||||
if (traits.needs_conversion) {
|
||||
flags = MakeAttribLoadFlag(attr.type);
|
||||
}
|
||||
if (traits.needs_emulation) {
|
||||
flags |= AttribLoadFlags::ZeroW;
|
||||
}
|
||||
}
|
||||
|
||||
auto [it, new_config] = programmable_vertex_map.try_emplace(config);
|
||||
if (new_config) {
|
||||
auto code = GenerateVertexShader(setup, config);
|
||||
if (!code) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
|
||||
programmable_vertex_map[config] = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string& program = code.value();
|
||||
auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance);
|
||||
auto& shader = iter->second;
|
||||
|
||||
if (new_program) {
|
||||
shader.program = std::move(program);
|
||||
const vk::Device device = instance.GetDevice();
|
||||
workers.QueueWork([device, &shader] {
|
||||
shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device);
|
||||
shader.MarkDone();
|
||||
});
|
||||
}
|
||||
|
||||
it->second = &shader;
|
||||
}
|
||||
|
||||
Shader* const shader{it->second};
|
||||
if (!shader) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
|
||||
return false;
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::VS] = shader;
|
||||
shader_hashes[ProgramType::VS] = config.Hash();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialVertexShader() {
|
||||
current_shaders[ProgramType::VS] = &trivial_vertex_shader;
|
||||
shader_hashes[ProgramType::VS] = 0;
|
||||
}
|
||||
|
||||
bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
if (!instance.UseGeometryShaders()) {
|
||||
UseTrivialGeometryShader();
|
||||
return true;
|
||||
}
|
||||
|
||||
const PicaFixedGSConfig gs_config{regs, instance};
|
||||
auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance);
|
||||
auto& shader = it->second;
|
||||
|
||||
if (new_shader) {
|
||||
workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() {
|
||||
const std::string code = GenerateFixedGeometryShader(gs_config);
|
||||
shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device);
|
||||
shader.MarkDone();
|
||||
});
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::GS] = &shader;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PipelineCache::UseTrivialGeometryShader() {
|
||||
current_shaders[ProgramType::GS] = nullptr;
|
||||
shader_hashes[ProgramType::GS] = 0;
|
||||
}
|
||||
|
||||
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
|
||||
const PicaFSConfig config{regs, instance};
|
||||
|
||||
const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance);
|
||||
auto& shader = it->second;
|
||||
|
||||
if (new_shader) {
|
||||
const bool use_spirv = Settings::values.spirv_shader_gen.GetValue();
|
||||
if (use_spirv && !config.state.shadow_rendering.Value()) {
|
||||
const std::vector code = GenerateFragmentShaderSPV(config);
|
||||
shader.module = CompileSPV(code, instance.GetDevice());
|
||||
shader.MarkDone();
|
||||
} else {
|
||||
workers.QueueWork([config, device = instance.GetDevice(), &shader]() {
|
||||
const std::string code = GenerateFragmentShader(config);
|
||||
shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device);
|
||||
shader.MarkDone();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
current_shaders[ProgramType::FS] = &shader;
|
||||
shader_hashes[ProgramType::FS] = config.Hash();
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) {
|
||||
auto& info = update_data[1][binding].image_info;
|
||||
if (info.imageView == image_view && info.sampler == sampler) {
|
||||
return;
|
||||
}
|
||||
set_dirty[1] = true;
|
||||
info = vk::DescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
}
|
||||
|
||||
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
|
||||
auto& info = update_data[2][binding].image_info;
|
||||
if (info.imageView == image_view) {
|
||||
return;
|
||||
}
|
||||
set_dirty[2] = true;
|
||||
info = vk::DescriptorImageInfo{
|
||||
.imageView = image_view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
}
|
||||
|
||||
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
|
||||
auto& info = update_data[0][binding].buffer_info;
|
||||
if (info.buffer == buffer && info.offset == offset && info.range == size) {
|
||||
return;
|
||||
}
|
||||
set_dirty[0] = true;
|
||||
info = vk::DescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
.range = size,
|
||||
};
|
||||
}
|
||||
|
||||
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
|
||||
auto& view = update_data[0][binding].buffer_view;
|
||||
if (view != buffer_view) {
|
||||
set_dirty[0] = true;
|
||||
view = buffer_view;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCache::SetBufferOffset(u32 binding, size_t offset) {
|
||||
offsets[binding] = static_cast<u32>(offset);
|
||||
}
|
||||
|
||||
bool PipelineCache::IsCacheValid(std::span<const u8> data) const {
|
||||
if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
|
||||
return false;
|
||||
}
|
||||
|
||||
vk::PipelineCacheHeaderVersionOne header;
|
||||
std::memcpy(&header, data.data(), sizeof(header));
|
||||
if (header.headerSize < sizeof(header)) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
|
||||
LOG_ERROR(
|
||||
Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
|
||||
header.vendorID, vendor_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
|
||||
LOG_ERROR(
|
||||
Render_Vulkan,
|
||||
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
|
||||
header.deviceID, device_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
|
||||
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::EnsureDirectories() const {
|
||||
const auto create_dir = [](const std::string& dir) {
|
||||
if (!FileUtil::CreateDir(dir)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
return create_dir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
|
||||
create_dir(GetPipelineCacheDir());
|
||||
}
|
||||
|
||||
std::string PipelineCache::GetPipelineCacheDir() const {
|
||||
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan" + DIR_SEP;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
123
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
123
src/video_core/renderer_vulkan/vk_pipeline_cache.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
||||
namespace Pica {
|
||||
struct Regs;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class RenderpassCache;
|
||||
class DescriptorPool;
|
||||
|
||||
constexpr u32 NUM_RASTERIZER_SETS = 3;
|
||||
constexpr u32 NUM_DYNAMIC_OFFSETS = 2;
|
||||
|
||||
/**
|
||||
* Stores a collection of rasterizer pipelines used during rendering.
|
||||
*/
|
||||
class PipelineCache {
|
||||
public:
|
||||
explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorPool& pool);
|
||||
~PipelineCache();
|
||||
|
||||
[[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept {
|
||||
return descriptor_set_providers[1];
|
||||
}
|
||||
|
||||
/// Loads the pipeline cache stored to disk
|
||||
void LoadDiskCache();
|
||||
|
||||
/// Stores the generated pipeline cache to disk
|
||||
void SaveDiskCache();
|
||||
|
||||
/// Binds a pipeline using the provided information
|
||||
bool BindPipeline(const PipelineInfo& info, bool wait_built = false);
|
||||
|
||||
/// Binds a PICA decompiled vertex shader
|
||||
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout);
|
||||
|
||||
/// Binds a passthrough vertex shader
|
||||
void UseTrivialVertexShader();
|
||||
|
||||
/// Binds a PICA decompiled geometry shader
|
||||
bool UseFixedGeometryShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a passthrough geometry shader
|
||||
void UseTrivialGeometryShader();
|
||||
|
||||
/// Binds a fragment shader generated from PICA state
|
||||
void UseFragmentShader(const Pica::Regs& regs);
|
||||
|
||||
/// Binds a texture to the specified binding
|
||||
void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler);
|
||||
|
||||
/// Binds a storage image to the specified binding
|
||||
void BindStorageImage(u32 binding, vk::ImageView image_view);
|
||||
|
||||
/// Binds a buffer to the specified binding
|
||||
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
|
||||
|
||||
/// Binds a buffer to the specified binding
|
||||
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
|
||||
|
||||
/// Sets the dynamic offset for the uniform buffer at binding
|
||||
void SetBufferOffset(u32 binding, size_t offset);
|
||||
|
||||
private:
|
||||
/// Builds the rasterizer pipeline layout
|
||||
void BuildLayout();
|
||||
|
||||
/// Returns true when the disk data can be used by the current driver
|
||||
bool IsCacheValid(std::span<const u8> cache_data) const;
|
||||
|
||||
/// Create shader disk cache directories. Returns true on success.
|
||||
bool EnsureDirectories() const;
|
||||
|
||||
/// Returns the pipeline cache storage dir
|
||||
std::string GetPipelineCacheDir() const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
DescriptorPool& pool;
|
||||
|
||||
vk::UniquePipelineCache pipeline_cache;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
std::size_t num_worker_threads;
|
||||
Common::ThreadWorker workers;
|
||||
PipelineInfo current_info{};
|
||||
GraphicsPipeline* current_pipeline{};
|
||||
tsl::robin_map<u64, std::unique_ptr<GraphicsPipeline>, Common::IdentityHash<u64>>
|
||||
graphics_pipelines;
|
||||
|
||||
std::array<DescriptorSetProvider, NUM_RASTERIZER_SETS> descriptor_set_providers;
|
||||
std::array<DescriptorSetData, NUM_RASTERIZER_SETS> update_data{};
|
||||
std::array<vk::DescriptorSet, NUM_RASTERIZER_SETS> bound_descriptor_sets{};
|
||||
std::array<u32, NUM_DYNAMIC_OFFSETS> offsets{};
|
||||
std::bitset<NUM_RASTERIZER_SETS> set_dirty{};
|
||||
|
||||
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
|
||||
std::array<Shader*, MAX_SHADER_STAGES> current_shaders;
|
||||
std::unordered_map<PicaVSConfig, Shader*> programmable_vertex_map;
|
||||
std::unordered_map<std::string, Shader> programmable_vertex_cache;
|
||||
std::unordered_map<PicaFixedGSConfig, Shader> fixed_geometry_shaders;
|
||||
std::unordered_map<PicaFSConfig, Shader> fragment_shaders;
|
||||
Shader trivial_vertex_shader;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
514
src/video_core/renderer_vulkan/vk_present_window.cpp
Normal file
514
src/video_core/renderer_vulkan/vk_present_window.cpp
Normal file
|
@ -0,0 +1,514 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_present_window.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, vk::Format format) {
|
||||
const vk::FormatProperties props{physical_device.getFormatProperties(format)};
|
||||
return static_cast<bool>(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst);
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::ImageSubresourceLayers MakeImageSubresourceLayers() {
|
||||
return vk::ImageSubresourceLayers{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::ImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
|
||||
s32 swapchain_height) {
|
||||
return vk::ImageBlit{
|
||||
.srcSubresource = MakeImageSubresourceLayers(),
|
||||
.srcOffsets =
|
||||
std::array{
|
||||
vk::Offset3D{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
vk::Offset3D{
|
||||
.x = frame_width,
|
||||
.y = frame_height,
|
||||
.z = 1,
|
||||
},
|
||||
},
|
||||
.dstSubresource = MakeImageSubresourceLayers(),
|
||||
.dstOffsets =
|
||||
std::array{
|
||||
vk::Offset3D{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
vk::Offset3D{
|
||||
.x = swapchain_width,
|
||||
.y = swapchain_height,
|
||||
.z = 1,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::ImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
|
||||
u32 swapchain_height) {
|
||||
return vk::ImageCopy{
|
||||
.srcSubresource = MakeImageSubresourceLayers(),
|
||||
.srcOffset =
|
||||
vk::Offset3D{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
.dstSubresource = MakeImageSubresourceLayers(),
|
||||
.dstOffset =
|
||||
vk::Offset3D{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.z = 0,
|
||||
},
|
||||
.extent =
|
||||
vk::Extent3D{
|
||||
.width = std::min(frame_width, swapchain_width),
|
||||
.height = std::min(frame_height, swapchain_height),
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& instance_,
|
||||
Scheduler& scheduler_)
|
||||
: emu_window{emu_window_}, instance{instance_}, scheduler{scheduler_},
|
||||
surface{CreateSurface(instance.GetInstance(), emu_window)},
|
||||
swapchain{instance, emu_window.GetFramebufferLayout().width,
|
||||
emu_window.GetFramebufferLayout().height, surface},
|
||||
graphics_queue{instance.GetGraphicsQueue()}, present_renderpass{CreateRenderpass()},
|
||||
vsync_enabled{Settings::values.use_vsync_new.GetValue()},
|
||||
blit_supported{
|
||||
CanBlitToSwapchain(instance.GetPhysicalDevice(), swapchain.GetSurfaceFormat().format)},
|
||||
use_present_thread{Settings::values.async_presentation.GetValue()},
|
||||
last_render_surface{emu_window.GetWindowInfo().render_surface} {
|
||||
|
||||
const vk::Device device = instance.GetDevice();
|
||||
const vk::CommandPoolCreateInfo pool_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer |
|
||||
vk::CommandPoolCreateFlagBits::eTransient,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
|
||||
};
|
||||
command_pool = device.createCommandPool(pool_info);
|
||||
|
||||
const vk::CommandBufferAllocateInfo alloc_info = {
|
||||
.commandPool = command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = SWAP_CHAIN_SIZE,
|
||||
};
|
||||
const std::vector command_buffers = device.allocateCommandBuffers(alloc_info);
|
||||
|
||||
for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) {
|
||||
Frame& frame = swap_chain[i];
|
||||
frame.cmdbuf = command_buffers[i];
|
||||
frame.render_ready = device.createSemaphore({});
|
||||
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
|
||||
free_queue.push(&frame);
|
||||
}
|
||||
|
||||
if (use_present_thread) {
|
||||
present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
|
||||
}
|
||||
}
|
||||
|
||||
PresentWindow::~PresentWindow() {
|
||||
scheduler.Finish();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
device.destroyCommandPool(command_pool);
|
||||
device.destroyRenderPass(present_renderpass);
|
||||
for (auto& frame : swap_chain) {
|
||||
device.destroyImageView(frame.image_view);
|
||||
device.destroyFramebuffer(frame.framebuffer);
|
||||
device.destroySemaphore(frame.render_ready);
|
||||
device.destroyFence(frame.present_done);
|
||||
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
void PresentWindow::RecreateFrame(Frame* frame, u32 width, u32 height) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
if (frame->framebuffer) {
|
||||
device.destroyFramebuffer(frame->framebuffer);
|
||||
}
|
||||
if (frame->image_view) {
|
||||
device.destroyImageView(frame->image_view);
|
||||
}
|
||||
if (frame->image) {
|
||||
vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation);
|
||||
}
|
||||
|
||||
const vk::Format format = swapchain.GetSurfaceFormat().format;
|
||||
const vk::ImageCreateInfo image_info = {
|
||||
.imageType = vk::ImageType::e2D,
|
||||
.format = format,
|
||||
.extent = {width, height, 1},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc,
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_info = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
.requiredFlags = 0,
|
||||
.preferredFlags = 0,
|
||||
.pool = VK_NULL_HANDLE,
|
||||
.pUserData = nullptr,
|
||||
};
|
||||
|
||||
VkImage unsafe_image{};
|
||||
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
|
||||
|
||||
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
|
||||
&unsafe_image, &frame->allocation, nullptr);
|
||||
if (result != VK_SUCCESS) [[unlikely]] {
|
||||
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
frame->image = vk::Image{unsafe_image};
|
||||
|
||||
const vk::ImageViewCreateInfo view_info = {
|
||||
.image = frame->image,
|
||||
.viewType = vk::ImageViewType::e2D,
|
||||
.format = format,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
frame->image_view = device.createImageView(view_info);
|
||||
|
||||
const vk::FramebufferCreateInfo framebuffer_info = {
|
||||
.renderPass = present_renderpass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &frame->image_view,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.layers = 1,
|
||||
};
|
||||
frame->framebuffer = instance.GetDevice().createFramebuffer(framebuffer_info);
|
||||
|
||||
frame->width = width;
|
||||
frame->height = height;
|
||||
}
|
||||
|
||||
Frame* PresentWindow::GetRenderFrame() {
|
||||
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
|
||||
|
||||
// Wait for free presentation frames
|
||||
std::unique_lock lock{free_mutex};
|
||||
free_cv.wait(lock, [this] { return !free_queue.empty(); });
|
||||
|
||||
// Take the frame from the queue
|
||||
Frame* frame = free_queue.front();
|
||||
free_queue.pop();
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result{};
|
||||
|
||||
const auto wait = [&]() {
|
||||
result = device.waitForFences(frame->present_done, false, std::numeric_limits<u64>::max());
|
||||
return result;
|
||||
};
|
||||
|
||||
// Wait for the presentation to be finished so all frame resources are free
|
||||
while (wait() != vk::Result::eSuccess) {
|
||||
// Retry if the waiting times out
|
||||
if (result == vk::Result::eTimeout) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// eErrorInitializationFailed occurs on Mali GPU drivers due to them
|
||||
// using the ppoll() syscall which isn't correctly restarted after a signal,
|
||||
// we need to manually retry waiting in that case
|
||||
if (result == vk::Result::eErrorInitializationFailed) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
device.resetFences(frame->present_done);
|
||||
return frame;
|
||||
}
|
||||
|
||||
void PresentWindow::Present(Frame* frame) {
|
||||
if (!use_present_thread) {
|
||||
scheduler.WaitWorker();
|
||||
CopyToSwapchain(frame);
|
||||
free_queue.push(frame);
|
||||
return;
|
||||
}
|
||||
|
||||
scheduler.Record([this, frame](vk::CommandBuffer) {
|
||||
std::unique_lock lock{queue_mutex};
|
||||
present_queue.push(frame);
|
||||
frame_cv.notify_one();
|
||||
});
|
||||
}
|
||||
|
||||
void PresentWindow::WaitPresent() {
|
||||
if (!use_present_thread) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for the present queue to be empty
|
||||
{
|
||||
std::unique_lock queue_lock{queue_mutex};
|
||||
frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
|
||||
}
|
||||
|
||||
// The above condition will be satisfied when the last frame is taken from the queue.
|
||||
// To ensure that frame has been presented as well take hold of the swapchain
|
||||
// mutex.
|
||||
std::scoped_lock swapchain_lock{swapchain_mutex};
|
||||
}
|
||||
|
||||
void PresentWindow::PresentThread(std::stop_token token) {
|
||||
Common::SetCurrentThreadName("VulkanPresent");
|
||||
while (!token.stop_requested()) {
|
||||
std::unique_lock lock{queue_mutex};
|
||||
|
||||
// Wait for presentation frames
|
||||
Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
|
||||
if (token.stop_requested()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Take the frame and notify anyone waiting
|
||||
Frame* frame = present_queue.front();
|
||||
present_queue.pop();
|
||||
frame_cv.notify_one();
|
||||
|
||||
// By exchanging the lock ownership we take the swapchain lock
|
||||
// before the queue lock goes out of scope. This way the swapchain
|
||||
// lock in WaitPresent is guaranteed to occur after here.
|
||||
std::exchange(lock, std::unique_lock{swapchain_mutex});
|
||||
|
||||
CopyToSwapchain(frame);
|
||||
|
||||
// Free the frame for reuse
|
||||
std::scoped_lock fl{free_mutex};
|
||||
free_queue.push(frame);
|
||||
free_cv.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
void PresentWindow::NotifySurfaceChanged() {
|
||||
#ifdef ANDROID
|
||||
std::scoped_lock lock{recreate_surface_mutex};
|
||||
recreate_surface_cv.notify_one();
|
||||
#endif
|
||||
}
|
||||
|
||||
void PresentWindow::CopyToSwapchain(Frame* frame) {
|
||||
const auto recreate_swapchain = [&] { swapchain.Create(frame->width, frame->height, surface); };
|
||||
|
||||
#ifdef ANDROID
|
||||
std::unique_lock lock{recreate_surface_mutex};
|
||||
|
||||
recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), [&]() {
|
||||
return last_render_surface == emu_window.GetWindowInfo().render_surface;
|
||||
});
|
||||
|
||||
// If the frontend recreated the surface, recreate the renderer surface and swapchain.
|
||||
void* const render_surface = emu_window.GetWindowInfo().render_surface;
|
||||
if (last_render_surface != render_surface) {
|
||||
last_render_surface = render_surface;
|
||||
surface = CreateSurface(instance.GetInstance(), emu_window);
|
||||
recreate_swapchain();
|
||||
}
|
||||
#else
|
||||
const bool use_vsync = Settings::values.use_vsync_new.GetValue();
|
||||
const bool size_changed =
|
||||
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
|
||||
const bool vsync_changed = vsync_enabled != use_vsync;
|
||||
if (vsync_changed || size_changed) [[unlikely]] {
|
||||
vsync_enabled = use_vsync;
|
||||
recreate_swapchain();
|
||||
}
|
||||
#endif
|
||||
|
||||
while (!swapchain.AcquireNextImage()) {
|
||||
recreate_swapchain();
|
||||
}
|
||||
|
||||
const vk::Image swapchain_image = swapchain.Image();
|
||||
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
};
|
||||
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
|
||||
cmdbuf.begin(begin_info);
|
||||
|
||||
const vk::Extent2D extent = swapchain.GetExtent();
|
||||
const std::array pre_barriers{
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eNone,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.oldLayout = vk::ImageLayout::eUndefined,
|
||||
.newLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = swapchain_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = frame->image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
};
|
||||
const vk::ImageMemoryBarrier post_barrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.newLayout = vk::ImageLayout::ePresentSrcKHR,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = swapchain_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, pre_barriers);
|
||||
|
||||
if (blit_supported) {
|
||||
cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
|
||||
vk::Filter::eLinear);
|
||||
} else {
|
||||
cmdbuf.copyImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
|
||||
}
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||
|
||||
cmdbuf.end();
|
||||
|
||||
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput,
|
||||
vk::PipelineStageFlagBits::eAllGraphics,
|
||||
};
|
||||
|
||||
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
|
||||
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
|
||||
const std::array wait_semaphores = {image_acquired, frame->render_ready};
|
||||
|
||||
vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = static_cast<u32>(wait_semaphores.size()),
|
||||
.pWaitSemaphores = wait_semaphores.data(),
|
||||
.pWaitDstStageMask = wait_stage_masks.data(),
|
||||
.commandBufferCount = 1u,
|
||||
.pCommandBuffers = &cmdbuf,
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &present_ready,
|
||||
};
|
||||
|
||||
std::scoped_lock submit_lock{scheduler.submit_mutex};
|
||||
|
||||
try {
|
||||
graphics_queue.submit(submit_info, frame->present_done);
|
||||
} catch (vk::DeviceLostError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
swapchain.Present();
|
||||
}
|
||||
|
||||
vk::RenderPass PresentWindow::CreateRenderpass() {
|
||||
const vk::AttachmentReference color_ref = {
|
||||
.attachment = 0,
|
||||
.layout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
const vk::SubpassDescription subpass = {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = 1u,
|
||||
.pColorAttachments = &color_ref,
|
||||
.pResolveAttachments = 0,
|
||||
.pDepthStencilAttachment = nullptr,
|
||||
};
|
||||
|
||||
const vk::AttachmentDescription color_attachment = {
|
||||
.format = swapchain.GetSurfaceFormat().format,
|
||||
.loadOp = vk::AttachmentLoadOp::eClear,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
|
||||
.initialLayout = vk::ImageLayout::eUndefined,
|
||||
.finalLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
};
|
||||
|
||||
const vk::RenderPassCreateInfo renderpass_info = {
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &color_attachment,
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
};
|
||||
|
||||
return instance.GetDevice().createRenderPass(renderpass_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
101
src/video_core/renderer_vulkan/vk_present_window.h
Normal file
101
src/video_core/renderer_vulkan/vk_present_window.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
VK_DEFINE_HANDLE(VmaAllocation)
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Swapchain;
|
||||
class Scheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
struct Frame {
|
||||
u32 width;
|
||||
u32 height;
|
||||
VmaAllocation allocation;
|
||||
vk::Framebuffer framebuffer;
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::Semaphore render_ready;
|
||||
vk::Fence present_done;
|
||||
vk::CommandBuffer cmdbuf;
|
||||
};
|
||||
|
||||
class PresentWindow final {
|
||||
static constexpr std::size_t SWAP_CHAIN_SIZE = 6;
|
||||
|
||||
public:
|
||||
explicit PresentWindow(Frontend::EmuWindow& emu_window, const Instance& instance,
|
||||
Scheduler& scheduler);
|
||||
~PresentWindow();
|
||||
|
||||
/// Waits for all queued frames to finish presenting.
|
||||
void WaitPresent();
|
||||
|
||||
/// Returns the last used render frame.
|
||||
Frame* GetRenderFrame();
|
||||
|
||||
/// Recreates the render frame to match provided parameters.
|
||||
void RecreateFrame(Frame* frame, u32 width, u32 height);
|
||||
|
||||
/// Queues the provided frame for presentation.
|
||||
void Present(Frame* frame);
|
||||
|
||||
/// This is called to notify the rendering backend of a surface change
|
||||
void NotifySurfaceChanged();
|
||||
|
||||
[[nodiscard]] vk::RenderPass Renderpass() const noexcept {
|
||||
return present_renderpass;
|
||||
}
|
||||
|
||||
u32 ImageCount() const noexcept {
|
||||
return swapchain.GetImageCount();
|
||||
}
|
||||
|
||||
private:
|
||||
void PresentThread(std::stop_token token);
|
||||
|
||||
void CopyToSwapchain(Frame* frame);
|
||||
|
||||
vk::RenderPass CreateRenderpass();
|
||||
|
||||
private:
|
||||
Frontend::EmuWindow& emu_window;
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
vk::SurfaceKHR surface;
|
||||
Swapchain swapchain;
|
||||
vk::CommandPool command_pool;
|
||||
vk::Queue graphics_queue;
|
||||
vk::RenderPass present_renderpass;
|
||||
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
|
||||
std::queue<Frame*> free_queue;
|
||||
std::queue<Frame*> present_queue;
|
||||
std::condition_variable free_cv;
|
||||
std::condition_variable recreate_surface_cv;
|
||||
std::condition_variable_any frame_cv;
|
||||
std::mutex swapchain_mutex;
|
||||
std::mutex recreate_surface_mutex;
|
||||
std::mutex queue_mutex;
|
||||
std::mutex free_mutex;
|
||||
std::jthread present_thread;
|
||||
bool vsync_enabled{};
|
||||
bool blit_supported;
|
||||
bool use_present_thread{true};
|
||||
void* last_render_surface{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
1138
src/video_core/renderer_vulkan/vk_rasterizer.cpp
Normal file
1138
src/video_core/renderer_vulkan/vk_rasterizer.cpp
Normal file
File diff suppressed because it is too large
Load diff
171
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
171
src/video_core/renderer_vulkan/vk_rasterizer.h
Normal file
|
@ -0,0 +1,171 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class CustomTexManager;
|
||||
class RendererBase;
|
||||
} // namespace VideoCore
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ScreenInfo;
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class RenderpassCache;
|
||||
class DescriptorPool;
|
||||
|
||||
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerVulkan(Memory::MemorySystem& memory,
|
||||
VideoCore::CustomTexManager& custom_tex_manager,
|
||||
VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window,
|
||||
const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
|
||||
RenderpassCache& renderpass_cache, u32 image_count);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void TickFrame();
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void DrawTriangles() override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(PAddr addr, u32 size) override;
|
||||
void InvalidateRegion(PAddr addr, u32 size) override;
|
||||
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
|
||||
void ClearAll(bool flush) override;
|
||||
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
|
||||
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
|
||||
u32 pixel_stride, ScreenInfo& screen_info);
|
||||
bool AccelerateDrawBatch(bool is_indexed) override;
|
||||
|
||||
void SyncFixedState() override;
|
||||
|
||||
private:
|
||||
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
|
||||
|
||||
/// Syncs the clip enabled status to match the PICA register
|
||||
void SyncClipEnabled();
|
||||
|
||||
/// Syncs the cull mode to match the PICA register
|
||||
void SyncCullMode();
|
||||
|
||||
/// Syncs the blend enabled status to match the PICA register
|
||||
void SyncBlendEnabled();
|
||||
|
||||
/// Syncs the blend functions to match the PICA register
|
||||
void SyncBlendFuncs();
|
||||
|
||||
/// Syncs the blend color to match the PICA register
|
||||
void SyncBlendColor();
|
||||
|
||||
/// Syncs the logic op states to match the PICA register
|
||||
void SyncLogicOp();
|
||||
|
||||
/// Syncs the color write mask to match the PICA register state
|
||||
void SyncColorWriteMask();
|
||||
|
||||
/// Syncs the stencil write mask to match the PICA register state
|
||||
void SyncStencilWriteMask();
|
||||
|
||||
/// Syncs the depth write mask to match the PICA register state
|
||||
void SyncDepthWriteMask();
|
||||
|
||||
/// Syncs the stencil test states to match the PICA register
|
||||
void SyncStencilTest();
|
||||
|
||||
/// Syncs the depth test states to match the PICA register
|
||||
void SyncDepthTest();
|
||||
|
||||
/// Syncs and uploads the lighting, fog and proctex LUTs
|
||||
void SyncAndUploadLUTs();
|
||||
void SyncAndUploadLUTsLF();
|
||||
|
||||
/// Syncs all enabled PICA texture units
|
||||
void SyncTextureUnits(const Framebuffer* framebuffer);
|
||||
|
||||
/// Binds the PICA shadow cube required for shadow mapping
|
||||
void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture);
|
||||
|
||||
/// Binds a texture cube to texture unit 0
|
||||
void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture);
|
||||
|
||||
/// Makes a temporary copy of the framebuffer if a feedback loop is detected
|
||||
bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface,
|
||||
Sampler& sampler);
|
||||
|
||||
/// Unbinds all special texture unit 0 texture configurations
|
||||
void UnbindSpecial();
|
||||
|
||||
/// Upload the uniform blocks to the uniform buffer object
|
||||
void UploadUniforms(bool accelerate_draw);
|
||||
|
||||
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
|
||||
bool Draw(bool accelerate, bool is_indexed);
|
||||
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
/// Setup index array for AccelerateDrawBatch
|
||||
void SetupIndexArray();
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray();
|
||||
|
||||
/// Setup the fixed attribute emulation in vulkan
|
||||
void SetupFixedAttribs();
|
||||
|
||||
/// Setup vertex shader for AccelerateDrawBatch
|
||||
bool SetupVertexShader();
|
||||
|
||||
/// Setup geometry shader for AccelerateDrawBatch
|
||||
bool SetupGeometryShader();
|
||||
|
||||
/// Creates the vertex layout struct used for software shader pipelines
|
||||
void MakeSoftwareVertexLayout();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
TextureRuntime runtime;
|
||||
RasterizerCache res_cache;
|
||||
|
||||
VertexLayout software_layout;
|
||||
std::array<u32, 16> binding_offsets{};
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
std::array<vk::Buffer, 16> vertex_buffers;
|
||||
VertexArrayInfo vertex_info;
|
||||
PipelineInfo pipeline_info;
|
||||
|
||||
StreamBuffer stream_buffer; ///< Vertex+Index buffer
|
||||
StreamBuffer uniform_buffer; ///< Uniform buffer
|
||||
StreamBuffer texture_buffer; ///< Texture buffer
|
||||
StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer
|
||||
vk::UniqueBufferView texture_lf_view;
|
||||
vk::UniqueBufferView texture_rg_view;
|
||||
vk::UniqueBufferView texture_rgba_view;
|
||||
u64 uniform_buffer_alignment;
|
||||
u64 uniform_size_aligned_vs;
|
||||
u64 uniform_size_aligned_fs;
|
||||
bool async_shaders{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
10
src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp
Normal file
10
src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp
Normal file
|
@ -0,0 +1,10 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace VideoCore {
|
||||
template class RasterizerCache<Vulkan::Traits>;
|
||||
} // namespace VideoCore
|
211
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
211
src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
Normal file
|
@ -0,0 +1,211 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
#include "common/assert.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCore::PixelFormat;
|
||||
using VideoCore::SurfaceType;
|
||||
|
||||
RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler} {}
|
||||
|
||||
RenderpassCache::~RenderpassCache() = default;
|
||||
|
||||
void RenderpassCache::BeginRendering(const Framebuffer* framebuffer,
|
||||
Common::Rectangle<u32> draw_rect) {
|
||||
const vk::Rect2D render_area = {
|
||||
.offset{
|
||||
.x = static_cast<s32>(draw_rect.left),
|
||||
.y = static_cast<s32>(draw_rect.bottom),
|
||||
},
|
||||
.extent{
|
||||
.width = draw_rect.GetWidth(),
|
||||
.height = draw_rect.GetHeight(),
|
||||
},
|
||||
};
|
||||
const RenderPass new_pass = {
|
||||
.framebuffer = framebuffer->Handle(),
|
||||
.render_pass = framebuffer->RenderPass(),
|
||||
.render_area = render_area,
|
||||
.clear = {},
|
||||
.do_clear = false,
|
||||
};
|
||||
images = framebuffer->Images();
|
||||
aspects = framebuffer->Aspects();
|
||||
BeginRendering(new_pass);
|
||||
}
|
||||
|
||||
void RenderpassCache::BeginRendering(const RenderPass& new_pass) {
|
||||
if (pass == new_pass) [[likely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
EndRendering();
|
||||
scheduler.Record([info = new_pass](vk::CommandBuffer cmdbuf) {
|
||||
const vk::RenderPassBeginInfo renderpass_begin_info = {
|
||||
.renderPass = info.render_pass,
|
||||
.framebuffer = info.framebuffer,
|
||||
.renderArea = info.render_area,
|
||||
.clearValueCount = info.do_clear ? 1u : 0u,
|
||||
.pClearValues = &info.clear,
|
||||
};
|
||||
cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
|
||||
});
|
||||
|
||||
pass = new_pass;
|
||||
}
|
||||
|
||||
void RenderpassCache::EndRendering() {
|
||||
if (!pass.render_pass) {
|
||||
return;
|
||||
}
|
||||
|
||||
pass.render_pass = vk::RenderPass{};
|
||||
scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) {
|
||||
u32 num_barriers = 0;
|
||||
vk::PipelineStageFlags pipeline_flags{};
|
||||
std::array<vk::ImageMemoryBarrier, 2> barriers;
|
||||
for (u32 i = 0; i < images.size(); i++) {
|
||||
if (!images[i]) {
|
||||
continue;
|
||||
}
|
||||
const bool is_color = static_cast<bool>(aspects[i] & vk::ImageAspectFlagBits::eColor);
|
||||
if (is_color) {
|
||||
pipeline_flags |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
} else {
|
||||
pipeline_flags |= vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests;
|
||||
}
|
||||
barriers[num_barriers++] = vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = is_color ? vk::AccessFlagBits::eColorAttachmentWrite
|
||||
: vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask =
|
||||
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = images[i],
|
||||
.subresourceRange{
|
||||
.aspectMask = aspects[i],
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
}
|
||||
cmdbuf.endRenderPass();
|
||||
cmdbuf.pipelineBarrier(pipeline_flags,
|
||||
vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, 0, nullptr, 0, nullptr,
|
||||
num_barriers, barriers.data());
|
||||
});
|
||||
}
|
||||
|
||||
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color,
|
||||
VideoCore::PixelFormat depth, bool is_clear) {
|
||||
std::scoped_lock lock{cache_mutex};
|
||||
|
||||
const u32 color_index =
|
||||
color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast<u32>(color);
|
||||
const u32 depth_index = depth == VideoCore::PixelFormat::Invalid
|
||||
? MAX_DEPTH_FORMATS
|
||||
: (static_cast<u32>(depth) - 14);
|
||||
|
||||
ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS,
|
||||
"Invalid color index {} and/or depth_index {}", color_index, depth_index);
|
||||
|
||||
vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear];
|
||||
if (!renderpass) {
|
||||
const vk::Format color_format = instance.GetTraits(color).native;
|
||||
const vk::Format depth_format = instance.GetTraits(depth).native;
|
||||
const vk::AttachmentLoadOp load_op =
|
||||
is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad;
|
||||
renderpass = CreateRenderPass(color_format, depth_format, load_op);
|
||||
}
|
||||
|
||||
return *renderpass;
|
||||
}
|
||||
|
||||
vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth,
|
||||
vk::AttachmentLoadOp load_op) const {
|
||||
u32 attachment_count = 0;
|
||||
std::array<vk::AttachmentDescription, 2> attachments;
|
||||
|
||||
bool use_color = false;
|
||||
vk::AttachmentReference color_attachment_ref{};
|
||||
bool use_depth = false;
|
||||
vk::AttachmentReference depth_attachment_ref{};
|
||||
|
||||
if (color != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = color,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
|
||||
.initialLayout = vk::ImageLayout::eGeneral,
|
||||
.finalLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
color_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
use_color = true;
|
||||
}
|
||||
|
||||
if (depth != vk::Format::eUndefined) {
|
||||
attachments[attachment_count] = vk::AttachmentDescription{
|
||||
.format = depth,
|
||||
.loadOp = load_op,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = load_op,
|
||||
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
|
||||
.initialLayout = vk::ImageLayout::eGeneral,
|
||||
.finalLayout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
depth_attachment_ref = vk::AttachmentReference{
|
||||
.attachment = attachment_count++,
|
||||
.layout = vk::ImageLayout::eGeneral,
|
||||
};
|
||||
|
||||
use_depth = true;
|
||||
}
|
||||
|
||||
const vk::SubpassDescription subpass = {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = use_color ? 1u : 0u,
|
||||
.pColorAttachments = &color_attachment_ref,
|
||||
.pResolveAttachments = 0,
|
||||
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr,
|
||||
};
|
||||
|
||||
const vk::RenderPassCreateInfo renderpass_info = {
|
||||
.attachmentCount = attachment_count,
|
||||
.pAttachments = attachments.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 0,
|
||||
.pDependencies = nullptr,
|
||||
};
|
||||
|
||||
return instance.GetDevice().createRenderPassUnique(renderpass_info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
73
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
73
src/video_core/renderer_vulkan/vk_renderpass_cache.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
enum class PixelFormat : u32;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
class Framebuffer;
|
||||
|
||||
struct RenderPass {
|
||||
vk::Framebuffer framebuffer;
|
||||
vk::RenderPass render_pass;
|
||||
vk::Rect2D render_area;
|
||||
vk::ClearValue clear;
|
||||
bool do_clear;
|
||||
|
||||
bool operator==(const RenderPass& other) const noexcept {
|
||||
return std::tie(framebuffer, render_pass, render_area, do_clear) ==
|
||||
std::tie(other.framebuffer, other.render_pass, other.render_area,
|
||||
other.do_clear) &&
|
||||
std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0;
|
||||
}
|
||||
};
|
||||
|
||||
class RenderpassCache {
|
||||
static constexpr size_t MAX_COLOR_FORMATS = 5;
|
||||
static constexpr size_t MAX_DEPTH_FORMATS = 4;
|
||||
|
||||
public:
|
||||
explicit RenderpassCache(const Instance& instance, Scheduler& scheduler);
|
||||
~RenderpassCache();
|
||||
|
||||
/// Begins a new renderpass with the provided framebuffer as render target.
|
||||
void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle<u32> draw_rect);
|
||||
|
||||
/// Begins a new renderpass with the provided render state.
|
||||
void BeginRendering(const RenderPass& new_pass);
|
||||
|
||||
/// Exits from any currently active renderpass instance
|
||||
void EndRendering();
|
||||
|
||||
/// Returns the renderpass associated with the color-depth format pair
|
||||
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
|
||||
bool is_clear);
|
||||
|
||||
private:
|
||||
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
|
||||
vk::UniqueRenderPass CreateRenderPass(vk::Format color, vk::Format depth,
|
||||
vk::AttachmentLoadOp load_op) const;
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2];
|
||||
std::mutex cache_mutex;
|
||||
std::array<vk::Image, 2> images;
|
||||
std::array<vk::ImageAspectFlags, 2> aspects;
|
||||
RenderPass pass{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
113
src/video_core/renderer_vulkan/vk_resource_pool.cpp
Normal file
113
src/video_core/renderer_vulkan/vk_resource_pool.cpp
Normal file
|
@ -0,0 +1,113 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, size_t grow_step_)
|
||||
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
|
||||
|
||||
std::size_t ResourcePool::CommitResource() {
|
||||
// Refresh semaphore to query updated results
|
||||
master_semaphore->Refresh();
|
||||
const u64 gpu_tick = master_semaphore->KnownGpuTick();
|
||||
const auto search = [this, gpu_tick](std::size_t begin,
|
||||
std::size_t end) -> std::optional<std::size_t> {
|
||||
for (std::size_t iterator = begin; iterator < end; ++iterator) {
|
||||
if (gpu_tick >= ticks[iterator]) {
|
||||
ticks[iterator] = master_semaphore->CurrentTick();
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Try to find a free resource from the hinted position to the end.
|
||||
std::optional<std::size_t> found = search(hint_iterator, ticks.size());
|
||||
if (!found) {
|
||||
// Search from beginning to the hinted position.
|
||||
found = search(0, hint_iterator);
|
||||
if (!found) {
|
||||
// Both searches failed, the pool is full; handle it.
|
||||
const std::size_t free_resource = ManageOverflow();
|
||||
|
||||
ticks[free_resource] = master_semaphore->CurrentTick();
|
||||
found = free_resource;
|
||||
}
|
||||
}
|
||||
|
||||
// Free iterator is hinted to the resource after the one that's been commited.
|
||||
hint_iterator = (*found + 1) % ticks.size();
|
||||
return *found;
|
||||
}
|
||||
|
||||
std::size_t ResourcePool::ManageOverflow() {
|
||||
const std::size_t old_capacity = ticks.size();
|
||||
Grow();
|
||||
|
||||
// The last entry is guaranted to be free, since it's the first element of the freshly
|
||||
// allocated resources.
|
||||
return old_capacity;
|
||||
}
|
||||
|
||||
void ResourcePool::Grow() {
|
||||
const size_t old_capacity = ticks.size();
|
||||
ticks.resize(old_capacity + grow_step);
|
||||
Allocate(old_capacity, old_capacity + grow_step);
|
||||
}
|
||||
|
||||
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
|
||||
|
||||
struct CommandPool::Pool {
|
||||
vk::CommandPool handle;
|
||||
std::array<vk::CommandBuffer, COMMAND_BUFFER_POOL_SIZE> cmdbufs;
|
||||
};
|
||||
|
||||
CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore)
|
||||
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {}
|
||||
|
||||
CommandPool::~CommandPool() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (Pool& pool : pools) {
|
||||
device.destroyCommandPool(pool.handle);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
|
||||
// Command buffers are going to be commited, recorded, executed every single usage cycle.
|
||||
// They are also going to be reseted when commited.
|
||||
Pool& pool = pools.emplace_back();
|
||||
|
||||
const vk::CommandPoolCreateInfo pool_create_info = {
|
||||
.flags = vk::CommandPoolCreateFlagBits::eTransient |
|
||||
vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
pool.handle = device.createCommandPool(pool_create_info);
|
||||
|
||||
const vk::CommandBufferAllocateInfo buffer_alloc_info = {
|
||||
.commandPool = pool.handle,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = COMMAND_BUFFER_POOL_SIZE,
|
||||
};
|
||||
|
||||
auto buffers = device.allocateCommandBuffers(buffer_alloc_info);
|
||||
std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin());
|
||||
}
|
||||
|
||||
vk::CommandBuffer CommandPool::Commit() {
|
||||
const std::size_t index = CommitResource();
|
||||
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
|
||||
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
|
||||
return pools[pool_index].cmdbufs[sub_index];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
67
src/video_core/renderer_vulkan/vk_resource_pool.h
Normal file
67
src/video_core/renderer_vulkan/vk_resource_pool.h
Normal file
|
@ -0,0 +1,67 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class MasterSemaphore;
|
||||
|
||||
/**
|
||||
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
|
||||
* resources.
|
||||
*/
|
||||
class ResourcePool {
|
||||
public:
|
||||
explicit ResourcePool() = default;
|
||||
explicit ResourcePool(MasterSemaphore* master_semaphore, std::size_t grow_step);
|
||||
virtual ~ResourcePool() = default;
|
||||
|
||||
ResourcePool& operator=(ResourcePool&&) noexcept = default;
|
||||
ResourcePool(ResourcePool&&) noexcept = default;
|
||||
|
||||
ResourcePool& operator=(const ResourcePool&) = default;
|
||||
ResourcePool(const ResourcePool&) = default;
|
||||
|
||||
protected:
|
||||
std::size_t CommitResource();
|
||||
|
||||
/// Called when a chunk of resources have to be allocated.
|
||||
virtual void Allocate(std::size_t begin, std::size_t end) = 0;
|
||||
|
||||
private:
|
||||
/// Manages pool overflow allocating new resources.
|
||||
std::size_t ManageOverflow();
|
||||
|
||||
/// Allocates a new page of resources.
|
||||
void Grow();
|
||||
|
||||
protected:
|
||||
MasterSemaphore* master_semaphore{nullptr};
|
||||
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
|
||||
std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
|
||||
std::vector<u64> ticks; ///< Ticks for each resource
|
||||
};
|
||||
|
||||
class CommandPool final : public ResourcePool {
|
||||
public:
|
||||
explicit CommandPool(const Instance& instance, MasterSemaphore* master_semaphore);
|
||||
~CommandPool() override;
|
||||
|
||||
void Allocate(std::size_t begin, std::size_t end) override;
|
||||
|
||||
vk::CommandBuffer Commit();
|
||||
|
||||
private:
|
||||
struct Pool;
|
||||
const Instance& instance;
|
||||
std::vector<Pool> pools;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
203
src/video_core/renderer_vulkan/vk_scheduler.cpp
Normal file
203
src/video_core/renderer_vulkan/vk_scheduler.cpp
Normal file
|
@ -0,0 +1,203 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
|
||||
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
std::unique_ptr<MasterSemaphore> MakeMasterSemaphore(const Instance& instance) {
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
return std::make_unique<MasterSemaphoreTimeline>(instance);
|
||||
} else {
|
||||
return std::make_unique<MasterSemaphoreFence>(instance);
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
|
||||
auto command = first;
|
||||
while (command != nullptr) {
|
||||
auto next = command->GetNext();
|
||||
command->Execute(cmdbuf);
|
||||
command->~Command();
|
||||
command = next;
|
||||
}
|
||||
submit = false;
|
||||
command_offset = 0;
|
||||
first = nullptr;
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache)
|
||||
: renderpass_cache{renderpass_cache}, master_semaphore{MakeMasterSemaphore(instance)},
|
||||
command_pool{instance, master_semaphore.get()}, use_worker_thread{
|
||||
!Settings::values.renderer_debug} {
|
||||
AllocateWorkerCommandBuffers();
|
||||
if (use_worker_thread) {
|
||||
AcquireNewChunk();
|
||||
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
|
||||
}
|
||||
}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
// When flushing, we only send data to the worker thread; no waiting is necessary.
|
||||
SubmitExecution(signal, wait);
|
||||
}
|
||||
|
||||
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
|
||||
// When finishing, we need to wait for the submission to have executed on the device.
|
||||
const u64 presubmit_tick = CurrentTick();
|
||||
SubmitExecution(signal, wait);
|
||||
Wait(presubmit_tick);
|
||||
}
|
||||
|
||||
void Scheduler::WaitWorker() {
|
||||
if (!use_worker_thread) {
|
||||
return;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
|
||||
DispatchWork();
|
||||
|
||||
// Ensure the queue is drained.
|
||||
{
|
||||
std::unique_lock ql{queue_mutex};
|
||||
event_cv.wait(ql, [this] { return work_queue.empty(); });
|
||||
}
|
||||
|
||||
// Now wait for execution to finish.
|
||||
// This needs to be done in the same order as WorkerThread.
|
||||
std::scoped_lock el{execution_mutex};
|
||||
}
|
||||
|
||||
void Scheduler::Wait(u64 tick) {
|
||||
if (tick >= master_semaphore->CurrentTick()) {
|
||||
// Make sure we are not waiting for the current tick without signalling
|
||||
Flush();
|
||||
}
|
||||
master_semaphore->Wait(tick);
|
||||
}
|
||||
|
||||
void Scheduler::DispatchWork() {
|
||||
if (!use_worker_thread || chunk->Empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock ql{queue_mutex};
|
||||
work_queue.push(std::move(chunk));
|
||||
}
|
||||
|
||||
event_cv.notify_all();
|
||||
AcquireNewChunk();
|
||||
}
|
||||
|
||||
void Scheduler::WorkerThread(std::stop_token stop_token) {
|
||||
Common::SetCurrentThreadName("VulkanWorker");
|
||||
|
||||
const auto TryPopQueue{[this](auto& work) -> bool {
|
||||
if (work_queue.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
work = std::move(work_queue.front());
|
||||
work_queue.pop();
|
||||
event_cv.notify_all();
|
||||
return true;
|
||||
}};
|
||||
|
||||
while (!stop_token.stop_requested()) {
|
||||
std::unique_ptr<CommandChunk> work;
|
||||
|
||||
{
|
||||
std::unique_lock lk{queue_mutex};
|
||||
|
||||
// Wait for work.
|
||||
Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
|
||||
|
||||
// If we've been asked to stop, we're done.
|
||||
if (stop_token.stop_requested()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Exchange lock ownership so that we take the execution lock before
|
||||
// the queue lock goes out of scope. This allows us to force execution
|
||||
// to complete in the next step.
|
||||
std::exchange(lk, std::unique_lock{execution_mutex});
|
||||
|
||||
// Perform the work, tracking whether the chunk was a submission
|
||||
// before executing.
|
||||
const bool has_submit = work->HasSubmit();
|
||||
work->ExecuteAll(current_cmdbuf);
|
||||
|
||||
// If the chunk was a submission, reallocate the command buffer.
|
||||
if (has_submit) {
|
||||
AllocateWorkerCommandBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock rl{reserve_mutex};
|
||||
|
||||
// Recycle the chunk back to the reserve.
|
||||
chunk_reserve.emplace_back(std::move(work));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
};
|
||||
|
||||
current_cmdbuf = command_pool.Commit();
|
||||
current_cmdbuf.begin(begin_info);
|
||||
}
|
||||
|
||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||
state = StateFlags::AllDirty;
|
||||
const u64 signal_value = master_semaphore->NextTick();
|
||||
|
||||
renderpass_cache.EndRendering();
|
||||
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
|
||||
MICROPROFILE_SCOPE(Vulkan_Submit);
|
||||
std::scoped_lock lock{submit_mutex};
|
||||
master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value);
|
||||
});
|
||||
|
||||
if (!use_worker_thread) {
|
||||
AllocateWorkerCommandBuffers();
|
||||
} else {
|
||||
chunk->MarkSubmit();
|
||||
DispatchWork();
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::AcquireNewChunk() {
|
||||
std::scoped_lock lock{reserve_mutex};
|
||||
if (chunk_reserve.empty()) {
|
||||
chunk = std::make_unique<CommandChunk>();
|
||||
return;
|
||||
}
|
||||
|
||||
chunk = std::move(chunk_reserve.back());
|
||||
chunk_reserve.pop_back();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
210
src/video_core/renderer_vulkan/vk_scheduler.h
Normal file
210
src/video_core/renderer_vulkan/vk_scheduler.h
Normal file
|
@ -0,0 +1,210 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "common/alignment.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_resource_pool.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class StateFlags {
|
||||
AllDirty = 0,
|
||||
Renderpass = 1 << 0,
|
||||
Pipeline = 1 << 1,
|
||||
DescriptorSets = 1 << 2
|
||||
};
|
||||
|
||||
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
|
||||
/// Sends the current execution context to the GPU and waits for it to complete.
|
||||
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
|
||||
|
||||
/// Waits for the worker thread to finish executing everything. After this function returns it's
|
||||
/// safe to touch worker resources.
|
||||
void WaitWorker();
|
||||
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Sends currently recorded work to the worker thread.
|
||||
void DispatchWork();
|
||||
|
||||
/// Records the command to the current chunk.
|
||||
template <typename T>
|
||||
void Record(T&& command) {
|
||||
if (!use_worker_thread) {
|
||||
command(current_cmdbuf);
|
||||
return;
|
||||
}
|
||||
|
||||
if (chunk->Record(command)) {
|
||||
return;
|
||||
}
|
||||
DispatchWork();
|
||||
(void)chunk->Record(command);
|
||||
}
|
||||
|
||||
/// Marks the provided state as non dirty
|
||||
void MarkStateNonDirty(StateFlags flag) noexcept {
|
||||
state |= flag;
|
||||
}
|
||||
|
||||
/// Marks the provided state as dirty
|
||||
void MakeDirty(StateFlags flag) noexcept {
|
||||
state &= ~flag;
|
||||
}
|
||||
|
||||
/// Returns true if the state is dirty
|
||||
[[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept {
|
||||
return False(state & flag);
|
||||
}
|
||||
|
||||
/// Returns the current command buffer tick.
|
||||
[[nodiscard]] u64 CurrentTick() const noexcept {
|
||||
return master_semaphore->CurrentTick();
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
return master_semaphore->IsFree(tick);
|
||||
}
|
||||
|
||||
/// Returns the master timeline semaphore.
|
||||
[[nodiscard]] MasterSemaphore* GetMasterSemaphore() noexcept {
|
||||
return master_semaphore.get();
|
||||
}
|
||||
|
||||
std::mutex submit_mutex;
|
||||
|
||||
private:
|
||||
class Command {
|
||||
public:
|
||||
virtual ~Command() = default;
|
||||
|
||||
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
|
||||
|
||||
Command* GetNext() const {
|
||||
return next;
|
||||
}
|
||||
|
||||
void SetNext(Command* next_) {
|
||||
next = next_;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* next = nullptr;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TypedCommand final : public Command {
|
||||
public:
|
||||
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
|
||||
~TypedCommand() override = default;
|
||||
|
||||
TypedCommand(TypedCommand&&) = delete;
|
||||
TypedCommand& operator=(TypedCommand&&) = delete;
|
||||
|
||||
void Execute(vk::CommandBuffer cmdbuf) const override {
|
||||
command(cmdbuf);
|
||||
}
|
||||
|
||||
private:
|
||||
T command;
|
||||
};
|
||||
|
||||
class CommandChunk final {
|
||||
public:
|
||||
void ExecuteAll(vk::CommandBuffer cmdbuf);
|
||||
|
||||
template <typename T>
|
||||
bool Record(T& command) {
|
||||
using FuncType = TypedCommand<T>;
|
||||
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
|
||||
|
||||
recorded_counts++;
|
||||
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
|
||||
if (command_offset > sizeof(data) - sizeof(FuncType)) {
|
||||
return false;
|
||||
}
|
||||
Command* const current_last = last;
|
||||
last = new (data.data() + command_offset) FuncType(std::move(command));
|
||||
|
||||
if (current_last) {
|
||||
current_last->SetNext(last);
|
||||
} else {
|
||||
first = last;
|
||||
}
|
||||
command_offset += sizeof(FuncType);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MarkSubmit() {
|
||||
submit = true;
|
||||
}
|
||||
|
||||
bool Empty() const {
|
||||
return recorded_counts == 0;
|
||||
}
|
||||
|
||||
bool HasSubmit() const {
|
||||
return submit;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* first = nullptr;
|
||||
Command* last = nullptr;
|
||||
|
||||
std::size_t recorded_counts = 0;
|
||||
std::size_t command_offset = 0;
|
||||
bool submit = false;
|
||||
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
|
||||
};
|
||||
|
||||
private:
|
||||
void WorkerThread(std::stop_token stop_token);
|
||||
|
||||
void AllocateWorkerCommandBuffers();
|
||||
|
||||
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
|
||||
|
||||
void AcquireNewChunk();
|
||||
|
||||
private:
|
||||
RenderpassCache& renderpass_cache;
|
||||
std::unique_ptr<MasterSemaphore> master_semaphore;
|
||||
CommandPool command_pool;
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
std::queue<std::unique_ptr<CommandChunk>> work_queue;
|
||||
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
|
||||
vk::CommandBuffer current_cmdbuf;
|
||||
StateFlags state{};
|
||||
std::mutex execution_mutex;
|
||||
std::mutex reserve_mutex;
|
||||
std::mutex queue_mutex;
|
||||
std::condition_variable_any event_cv;
|
||||
std::jthread worker_thread;
|
||||
bool use_worker_thread;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1511,10 +1511,11 @@ vec4 secondary_fragment_color = vec4(0.0);
|
|||
"gl_FragCoord.y < float(scissor_y2))) discard;\n";
|
||||
}
|
||||
|
||||
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
|
||||
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
|
||||
// do our own transformation according to PICA specification.
|
||||
out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"
|
||||
// The PICA depth range is [-1, 0] while in Vulkan that range is [0, 1].
|
||||
// Thus in the vertex shader we flip the sign of the z component to place
|
||||
// it in the correct range. Here we undo the transformation to get the original z_over_w,
|
||||
// then do our own transformation according to PICA specification.
|
||||
out += "float z_over_w = -gl_FragCoord.z;\n"
|
||||
"float depth = z_over_w * depth_scale + depth_offset;\n";
|
||||
if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
|
||||
out += "depth /= gl_FragCoord.w;\n";
|
||||
|
@ -1661,8 +1662,7 @@ void main() {
|
|||
texcoord0_w = vert_texcoord0_w;
|
||||
normquat = vert_normquat;
|
||||
view = vert_view;
|
||||
gl_Position = vert_position;
|
||||
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
|
||||
gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
|
||||
)";
|
||||
if (use_clip_planes) {
|
||||
out += R"(
|
||||
|
@ -1780,8 +1780,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
|||
semantic(VSOutputAttributes::POSITION_Y) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_W) + ");\n";
|
||||
out += " gl_Position = vtx_pos;\n";
|
||||
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
|
||||
out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n";
|
||||
if (config.use_clip_planes) {
|
||||
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
|
||||
out += " if (enable_clip1) {\n";
|
||||
|
@ -1867,8 +1866,7 @@ struct Vertex {
|
|||
semantic(VSOutputAttributes::POSITION_Y) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_W) + ");\n";
|
||||
out += " gl_Position = vtx_pos;\n";
|
||||
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
|
||||
out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n";
|
||||
if (use_clip_planes) {
|
||||
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
|
||||
out += " if (enable_clip1) {\n";
|
||||
|
|
|
@ -115,7 +115,7 @@ void FragmentModule::WriteDepth() {
|
|||
const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)};
|
||||
const Id gl_frag_coord_z{
|
||||
OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))};
|
||||
const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))};
|
||||
const Id z_over_w{OpFNegate(f32_id, gl_frag_coord_z)};
|
||||
const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
|
||||
const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
|
||||
depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset);
|
||||
|
|
|
@ -160,7 +160,7 @@ bool InitializeCompiler() {
|
|||
|
||||
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device) {
|
||||
if (!InitializeCompiler()) {
|
||||
return VK_NULL_HANDLE;
|
||||
return {};
|
||||
}
|
||||
|
||||
EProfile profile = ECoreProfile;
|
||||
|
@ -182,7 +182,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
|||
includer)) [[unlikely]] {
|
||||
LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
|
||||
shader->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Even though there's only a single shader, we still need to link it to generate SPV
|
||||
|
@ -191,7 +191,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
|
|||
if (!program->link(messages)) {
|
||||
LOG_INFO(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(),
|
||||
program->getInfoDebugLog());
|
||||
return VK_NULL_HANDLE;
|
||||
return {};
|
||||
}
|
||||
|
||||
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
|
||||
|
@ -227,7 +227,7 @@ vk::ShaderModule CompileSPV(std::span<const u32> code, vk::Device device) {
|
|||
UNREACHABLE_MSG("{}", err.what());
|
||||
}
|
||||
|
||||
return VK_NULL_HANDLE;
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
201
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
201
src/video_core/renderer_vulkan/vk_stream_buffer.cpp
Normal file
|
@ -0,0 +1,201 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string_view BufferTypeName(BufferType type) {
|
||||
switch (type) {
|
||||
case BufferType::Upload:
|
||||
return "Upload";
|
||||
case BufferType::Download:
|
||||
return "Download";
|
||||
case BufferType::Stream:
|
||||
return "Stream";
|
||||
default:
|
||||
return "Invalid";
|
||||
}
|
||||
}
|
||||
|
||||
vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
|
||||
switch (type) {
|
||||
case BufferType::Upload:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
case BufferType::Download:
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached;
|
||||
case BufferType::Stream:
|
||||
return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown buffer type {}", type);
|
||||
return vk::MemoryPropertyFlagBits::eHostVisible;
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a memory type with the passed requirements
|
||||
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
|
||||
vk::MemoryPropertyFlags wanted) {
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||
if ((flags & wanted) == wanted) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Get the preferred host visible memory type.
|
||||
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
|
||||
vk::MemoryPropertyFlags flags = MakePropertyFlags(type);
|
||||
std::optional preferred_type = FindMemoryType(properties, flags);
|
||||
|
||||
constexpr std::array remove_flags = {
|
||||
vk::MemoryPropertyFlagBits::eHostCached,
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
};
|
||||
|
||||
for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) {
|
||||
flags &= ~remove_flags[i];
|
||||
preferred_type = FindMemoryType(properties, flags);
|
||||
}
|
||||
ASSERT_MSG(preferred_type, "No suitable memory type found");
|
||||
return preferred_type.value();
|
||||
}
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::BufferUsageFlags usage_, u64 size, BufferType type_)
|
||||
: instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()},
|
||||
stream_buffer_size{size}, usage{usage_}, type{type_} {
|
||||
CreateBuffers(size);
|
||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
device.unmapMemory(memory);
|
||||
device.destroyBuffer(buffer);
|
||||
device.freeMemory(memory);
|
||||
}
|
||||
|
||||
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
ASSERT(size <= stream_buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
offset = Common::AlignUp(offset, alignment);
|
||||
}
|
||||
|
||||
bool invalidate{false};
|
||||
if (offset + size > stream_buffer_size) {
|
||||
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||
invalidate = true;
|
||||
invalidation_mark = current_watch_cursor;
|
||||
current_watch_cursor = 0;
|
||||
offset = 0;
|
||||
|
||||
// Swap watches and reset waiting cursors.
|
||||
std::swap(previous_watches, current_watches);
|
||||
wait_cursor = 0;
|
||||
wait_bound = 0;
|
||||
}
|
||||
|
||||
const u64 mapped_upper_bound = offset + size;
|
||||
WaitPendingOperations(mapped_upper_bound);
|
||||
|
||||
return std::make_tuple(mapped + offset, offset, invalidate);
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
|
||||
size);
|
||||
|
||||
const vk::MappedMemoryRange range = {
|
||||
.memory = memory,
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
if (!is_coherent && type == BufferType::Download) {
|
||||
device.invalidateMappedMemoryRanges(range);
|
||||
} else if (!is_coherent) {
|
||||
device.flushMappedMemoryRanges(range);
|
||||
}
|
||||
|
||||
offset += size;
|
||||
|
||||
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
auto& watch = current_watches[current_watch_cursor++];
|
||||
watch.upper_bound = offset;
|
||||
watch.tick = scheduler.CurrentTick();
|
||||
}
|
||||
|
||||
void StreamBuffer::CreateBuffers(u64 prefered_size) {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
|
||||
const u32 preferred_type = GetMemoryType(memory_properties, type);
|
||||
const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type];
|
||||
const u32 preferred_heap = mem_type.heapIndex;
|
||||
is_coherent =
|
||||
static_cast<bool>(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||
|
||||
// Substract from the preferred heap size some bytes to avoid getting out of memory.
|
||||
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
|
||||
// As per DXVK's example, using `heap_size / 2`
|
||||
const VkDeviceSize allocable_size = heap_size / 2;
|
||||
buffer = device.createBuffer({
|
||||
.size = std::min(prefered_size, allocable_size),
|
||||
.usage = usage,
|
||||
});
|
||||
|
||||
const auto requirements = device.getBufferMemoryRequirements(buffer);
|
||||
stream_buffer_size = static_cast<u64>(requirements.size);
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KB with flags {}",
|
||||
BufferTypeName(type), stream_buffer_size / 1024,
|
||||
vk::to_string(mem_type.propertyFlags));
|
||||
|
||||
memory = device.allocateMemory({
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = preferred_type,
|
||||
});
|
||||
|
||||
device.bindBufferMemory(buffer, memory, 0);
|
||||
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
|
||||
}
|
||||
|
||||
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||
watches.resize(watches.size() + grow_size);
|
||||
}
|
||||
|
||||
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||
if (!invalidation_mark) {
|
||||
return;
|
||||
}
|
||||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
86
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
86
src/video_core/renderer_vulkan/vk_stream_buffer.h
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class BufferType : u32 {
|
||||
Upload = 0,
|
||||
Download = 1,
|
||||
Stream = 2,
|
||||
};
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
class StreamBuffer final {
|
||||
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
public:
|
||||
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
|
||||
vk::BufferUsageFlags usage, u64 size,
|
||||
BufferType type = BufferType::Stream);
|
||||
~StreamBuffer();
|
||||
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Commit(u64 size);
|
||||
|
||||
vk::Buffer Handle() const noexcept {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Watch {
|
||||
u64 tick{};
|
||||
u64 upper_bound{};
|
||||
};
|
||||
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(u64 prefered_size);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||
|
||||
void WaitPendingOperations(u64 requested_upper_bound);
|
||||
|
||||
private:
|
||||
const Instance& instance; ///< Vulkan instance.
|
||||
Scheduler& scheduler; ///< Command scheduler.
|
||||
|
||||
vk::Device device;
|
||||
vk::Buffer buffer; ///< Mapped buffer.
|
||||
vk::DeviceMemory memory; ///< Memory allocation.
|
||||
u8* mapped{}; ///< Pointer to the mapped memory
|
||||
u64 stream_buffer_size{}; ///< Stream buffer size.
|
||||
vk::BufferUsageFlags usage{};
|
||||
BufferType type;
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
bool is_coherent{}; ///< True if the buffer is coherent
|
||||
|
||||
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||
|
||||
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
236
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
236
src/video_core/renderer_vulkan/vk_swapchain.cpp
Normal file
|
@ -0,0 +1,236 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245));
|
||||
MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245));
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
Swapchain::Swapchain(const Instance& instance_, u32 width, u32 height, vk::SurfaceKHR surface_)
|
||||
: instance{instance_}, surface{surface_} {
|
||||
FindPresentFormat();
|
||||
SetPresentMode();
|
||||
Create(width, height, surface);
|
||||
}
|
||||
|
||||
Swapchain::~Swapchain() {
|
||||
Destroy();
|
||||
instance.GetInstance().destroySurfaceKHR(surface);
|
||||
}
|
||||
|
||||
void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
|
||||
width = width_;
|
||||
height = height_;
|
||||
surface = surface_;
|
||||
needs_recreation = false;
|
||||
|
||||
Destroy();
|
||||
|
||||
SetPresentMode();
|
||||
SetSurfaceProperties();
|
||||
|
||||
const std::array queue_family_indices = {
|
||||
instance.GetGraphicsQueueFamilyIndex(),
|
||||
instance.GetPresentQueueFamilyIndex(),
|
||||
};
|
||||
|
||||
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
|
||||
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
|
||||
const vk::SharingMode sharing_mode =
|
||||
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
|
||||
const vk::SwapchainCreateInfoKHR swapchain_info = {
|
||||
.surface = surface,
|
||||
.minImageCount = image_count,
|
||||
.imageFormat = surface_format.format,
|
||||
.imageColorSpace = surface_format.colorSpace,
|
||||
.imageExtent = extent,
|
||||
.imageArrayLayers = 1,
|
||||
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment |
|
||||
vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
|
||||
.imageSharingMode = sharing_mode,
|
||||
.queueFamilyIndexCount = queue_family_indices_count,
|
||||
.pQueueFamilyIndices = queue_family_indices.data(),
|
||||
.preTransform = transform,
|
||||
.compositeAlpha = composite_alpha,
|
||||
.presentMode = present_mode,
|
||||
.clipped = true,
|
||||
.oldSwapchain = nullptr,
|
||||
};
|
||||
|
||||
try {
|
||||
swapchain = instance.GetDevice().createSwapchainKHR(swapchain_info);
|
||||
} catch (vk::SystemError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "{}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
SetupImages();
|
||||
RefreshSemaphores();
|
||||
}
|
||||
|
||||
bool Swapchain::AcquireNextImage() {
|
||||
MICROPROFILE_SCOPE(Vulkan_Acquire);
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result =
|
||||
device.acquireNextImageKHR(swapchain, std::numeric_limits<u64>::max(),
|
||||
image_acquired[frame_index], VK_NULL_HANDLE, &image_index);
|
||||
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
case vk::Result::eSuboptimalKHR:
|
||||
case vk::Result::eErrorOutOfDateKHR:
|
||||
needs_recreation = true;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Swapchain acquire returned unknown result {}", result);
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
return !needs_recreation;
|
||||
}
|
||||
|
||||
void Swapchain::Present() {
|
||||
if (needs_recreation) {
|
||||
return;
|
||||
}
|
||||
|
||||
const vk::PresentInfoKHR present_info = {
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &present_ready[image_index],
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &image_index,
|
||||
};
|
||||
|
||||
MICROPROFILE_SCOPE(Vulkan_Present);
|
||||
try {
|
||||
[[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info);
|
||||
} catch (vk::OutOfDateKHRError&) {
|
||||
needs_recreation = true;
|
||||
} catch (const vk::SystemError& err) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
frame_index = (frame_index + 1) % image_count;
|
||||
}
|
||||
|
||||
void Swapchain::FindPresentFormat() {
|
||||
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
|
||||
|
||||
// If there is a single undefined surface format, the device doesn't care, so we'll just use
|
||||
// RGBA.
|
||||
if (formats[0].format == vk::Format::eUndefined) {
|
||||
surface_format.format = vk::Format::eR8G8B8A8Unorm;
|
||||
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to find a suitable format.
|
||||
for (const vk::SurfaceFormatKHR& sformat : formats) {
|
||||
vk::Format format = sformat.format;
|
||||
if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) {
|
||||
continue;
|
||||
}
|
||||
|
||||
surface_format.format = format;
|
||||
surface_format.colorSpace = sformat.colorSpace;
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void Swapchain::SetPresentMode() {
|
||||
present_mode = vk::PresentModeKHR::eFifo;
|
||||
if (!Settings::values.use_vsync_new) {
|
||||
const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface);
|
||||
const auto find_mode = [&modes](vk::PresentModeKHR requested) {
|
||||
auto it =
|
||||
std::find_if(modes.begin(), modes.end(),
|
||||
[&requested](vk::PresentModeKHR mode) { return mode == requested; });
|
||||
|
||||
return it != modes.end();
|
||||
};
|
||||
|
||||
const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox);
|
||||
present_mode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::SetSurfaceProperties() {
|
||||
const vk::SurfaceCapabilitiesKHR capabilities =
|
||||
instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface);
|
||||
|
||||
extent = capabilities.currentExtent;
|
||||
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
|
||||
extent.width = std::max(capabilities.minImageExtent.width,
|
||||
std::min(capabilities.maxImageExtent.width, width));
|
||||
extent.height = std::max(capabilities.minImageExtent.height,
|
||||
std::min(capabilities.maxImageExtent.height, height));
|
||||
}
|
||||
|
||||
// Select number of images in swap chain, we prefer one buffer in the background to work on
|
||||
image_count = capabilities.minImageCount + 1;
|
||||
if (capabilities.maxImageCount > 0) {
|
||||
image_count = std::min(image_count, capabilities.maxImageCount);
|
||||
}
|
||||
|
||||
// Prefer identity transform if possible
|
||||
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
|
||||
if (!(capabilities.supportedTransforms & transform)) {
|
||||
transform = capabilities.currentTransform;
|
||||
}
|
||||
|
||||
// Opaque is not supported everywhere.
|
||||
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eOpaque;
|
||||
if (!(capabilities.supportedCompositeAlpha & vk::CompositeAlphaFlagBitsKHR::eOpaque)) {
|
||||
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eInherit;
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Destroy() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
if (swapchain) {
|
||||
device.destroySwapchainKHR(swapchain);
|
||||
}
|
||||
for (u32 i = 0; i < image_count; i++) {
|
||||
device.destroySemaphore(image_acquired[i]);
|
||||
device.destroySemaphore(present_ready[i]);
|
||||
}
|
||||
image_acquired.clear();
|
||||
present_ready.clear();
|
||||
}
|
||||
|
||||
void Swapchain::RefreshSemaphores() {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
image_acquired.resize(image_count);
|
||||
present_ready.resize(image_count);
|
||||
|
||||
for (vk::Semaphore& semaphore : image_acquired) {
|
||||
semaphore = device.createSemaphore({});
|
||||
}
|
||||
for (vk::Semaphore& semaphore : present_ready) {
|
||||
semaphore = device.createSemaphore({});
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::SetupImages() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
images = device.getSwapchainImagesKHR(swapchain);
|
||||
image_count = static_cast<u32>(images.size());
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
110
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
110
src/video_core/renderer_vulkan/vk_swapchain.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
class Swapchain {
|
||||
public:
|
||||
explicit Swapchain(const Instance& instance, u32 width, u32 height, vk::SurfaceKHR surface);
|
||||
~Swapchain();
|
||||
|
||||
/// Creates (or recreates) the swapchain with a given size.
|
||||
void Create(u32 width, u32 height, vk::SurfaceKHR surface);
|
||||
|
||||
/// Acquires the next image in the swapchain.
|
||||
bool AcquireNextImage();
|
||||
|
||||
/// Presents the current image and move to the next one
|
||||
void Present();
|
||||
|
||||
vk::SurfaceKHR GetSurface() const {
|
||||
return surface;
|
||||
}
|
||||
|
||||
vk::Image Image() const {
|
||||
return images[image_index];
|
||||
}
|
||||
|
||||
vk::SurfaceFormatKHR GetSurfaceFormat() const {
|
||||
return surface_format;
|
||||
}
|
||||
|
||||
vk::SwapchainKHR GetHandle() const {
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
u32 GetWidth() const {
|
||||
return width;
|
||||
}
|
||||
|
||||
u32 GetHeight() const {
|
||||
return height;
|
||||
}
|
||||
|
||||
u32 GetImageCount() const {
|
||||
return image_count;
|
||||
}
|
||||
|
||||
vk::Extent2D GetExtent() const {
|
||||
return extent;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const {
|
||||
return image_acquired[frame_index];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const {
|
||||
return present_ready[image_index];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Selects the best available swapchain image format
|
||||
void FindPresentFormat();
|
||||
|
||||
/// Sets the best available present mode
|
||||
void SetPresentMode();
|
||||
|
||||
/// Sets the surface properties according to device capabilities
|
||||
void SetSurfaceProperties();
|
||||
|
||||
/// Destroys current swapchain resources
|
||||
void Destroy();
|
||||
|
||||
/// Performs creation of image views and framebuffers from the swapchain images
|
||||
void SetupImages();
|
||||
|
||||
/// Creates the image acquired and present ready semaphores
|
||||
void RefreshSemaphores();
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
vk::SwapchainKHR swapchain{};
|
||||
vk::SurfaceKHR surface{};
|
||||
vk::SurfaceFormatKHR surface_format;
|
||||
vk::PresentModeKHR present_mode;
|
||||
vk::Extent2D extent;
|
||||
vk::SurfaceTransformFlagBitsKHR transform;
|
||||
vk::CompositeAlphaFlagBitsKHR composite_alpha;
|
||||
std::vector<vk::Image> images;
|
||||
std::vector<vk::Semaphore> image_acquired;
|
||||
std::vector<vk::Semaphore> present_ready;
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
u32 image_count = 0;
|
||||
u32 image_index = 0;
|
||||
u32 frame_index = 0;
|
||||
bool needs_recreation = true;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
1570
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
1570
src/video_core/renderer_vulkan/vk_texture_runtime.cpp
Normal file
File diff suppressed because it is too large
Load diff
295
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
295
src/video_core/renderer_vulkan/vk_texture_runtime.h
Normal file
|
@ -0,0 +1,295 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <span>
|
||||
#include "video_core/rasterizer_cache/framebuffer_base.h"
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_helper.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
VK_DEFINE_HANDLE(VmaAllocation)
|
||||
|
||||
namespace VideoCore {
|
||||
struct Material;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class DescriptorPool;
|
||||
class DescriptorSetProvider;
|
||||
class Surface;
|
||||
|
||||
struct Handle {
|
||||
VmaAllocation alloc;
|
||||
vk::Image image;
|
||||
vk::UniqueImageView image_view;
|
||||
};
|
||||
|
||||
/**
|
||||
* Provides texture manipulation functions to the rasterizer cache
|
||||
* Separating this into a class makes it easier to abstract graphics API code
|
||||
*/
|
||||
class TextureRuntime {
|
||||
friend class Surface;
|
||||
|
||||
public:
|
||||
explicit TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorPool& pool,
|
||||
DescriptorSetProvider& texture_provider, u32 num_swapchain_images);
|
||||
~TextureRuntime();
|
||||
|
||||
const Instance& GetInstance() const {
|
||||
return instance;
|
||||
}
|
||||
|
||||
Scheduler& GetScheduler() const {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
RenderpassCache& GetRenderpassCache() {
|
||||
return renderpass_cache;
|
||||
}
|
||||
|
||||
/// Returns the removal threshold ticks for the garbage collector
|
||||
u32 RemoveThreshold();
|
||||
|
||||
/// Maps an internal staging buffer of the provided size for pixel uploads/downloads
|
||||
VideoCore::StagingData FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Attempts to reinterpret a rectangle of source to another rectangle of dest
|
||||
bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
/// Fills the rectangle of the texture with the clear value provided
|
||||
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
|
||||
|
||||
/// Copies a rectangle of src_tex to another rectange of dst_rect
|
||||
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
|
||||
|
||||
/// Blits a rectangle of src_tex to another rectange of dst_rect
|
||||
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
|
||||
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface);
|
||||
|
||||
/// Returns true if the provided pixel format needs convertion
|
||||
bool NeedsConversion(VideoCore::PixelFormat format) const;
|
||||
|
||||
/// Removes any descriptor sets that contain the provided image view.
|
||||
void FreeDescriptorSetsWithImage(vk::ImageView image_view);
|
||||
|
||||
private:
|
||||
/// Clears a partial texture rect using a clear rectangle
|
||||
void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
DescriptorSetProvider& texture_provider;
|
||||
BlitHelper blit_helper;
|
||||
StreamBuffer upload_buffer;
|
||||
StreamBuffer download_buffer;
|
||||
u32 num_swapchain_images;
|
||||
};
|
||||
|
||||
class Surface : public VideoCore::SurfaceBase {
|
||||
friend class TextureRuntime;
|
||||
|
||||
public:
|
||||
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params);
|
||||
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface,
|
||||
const VideoCore::Material* materal);
|
||||
~Surface();
|
||||
|
||||
Surface(const Surface&) = delete;
|
||||
Surface& operator=(const Surface&) = delete;
|
||||
|
||||
Surface(Surface&& o) noexcept = default;
|
||||
Surface& operator=(Surface&& o) noexcept = default;
|
||||
|
||||
vk::ImageAspectFlags Aspect() const noexcept {
|
||||
return traits.aspect;
|
||||
}
|
||||
|
||||
/// Returns the image at index, otherwise the base image
|
||||
vk::Image Image(u32 index = 1) const noexcept;
|
||||
|
||||
/// Returns the image view at index, otherwise the base view
|
||||
vk::ImageView ImageView(u32 index = 1) const noexcept;
|
||||
|
||||
/// Returns a copy of the upscaled image handle, used for feedback loops.
|
||||
vk::ImageView CopyImageView() noexcept;
|
||||
|
||||
/// Returns the framebuffer view of the surface image
|
||||
vk::ImageView FramebufferView() noexcept;
|
||||
|
||||
/// Returns the depth view of the surface image
|
||||
vk::ImageView DepthView() noexcept;
|
||||
|
||||
/// Returns the stencil view of the surface image
|
||||
vk::ImageView StencilView() noexcept;
|
||||
|
||||
/// Returns the R32 image view used for atomic load/store
|
||||
vk::ImageView StorageView() noexcept;
|
||||
|
||||
/// Returns a framebuffer handle for rendering to this surface
|
||||
vk::Framebuffer Framebuffer() noexcept;
|
||||
|
||||
/// Uploads pixel data in staging to a rectangle region of the surface texture
|
||||
void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);
|
||||
|
||||
/// Uploads the custom material to the surface allocation.
|
||||
void UploadCustom(const VideoCore::Material* material, u32 level);
|
||||
|
||||
/// Downloads pixel data to staging from a rectangle region of the surface texture
|
||||
void Download(const VideoCore::BufferTextureCopy& download,
|
||||
const VideoCore::StagingData& staging);
|
||||
|
||||
/// Scales up the surface to match the new resolution scale.
|
||||
void ScaleUp(u32 new_scale);
|
||||
|
||||
/// Returns the bpp of the internal surface format
|
||||
u32 GetInternalBytesPerPixel() const;
|
||||
|
||||
/// Returns the access flags indicative of the surface
|
||||
vk::AccessFlags AccessFlags() const noexcept;
|
||||
|
||||
/// Returns the pipeline stage flags indicative of the surface
|
||||
vk::PipelineStageFlags PipelineStageFlags() const noexcept;
|
||||
|
||||
private:
|
||||
/// Performs blit between the scaled/unscaled images
|
||||
void BlitScale(const VideoCore::TextureBlit& blit, bool up_scale);
|
||||
|
||||
/// Downloads scaled depth stencil data
|
||||
void DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
|
||||
const VideoCore::StagingData& staging);
|
||||
|
||||
public:
|
||||
TextureRuntime* runtime;
|
||||
const Instance* instance;
|
||||
Scheduler* scheduler;
|
||||
FormatTraits traits;
|
||||
std::array<Handle, 3> handles{};
|
||||
std::array<vk::UniqueFramebuffer, 2> framebuffers{};
|
||||
Handle copy_handle;
|
||||
vk::UniqueImageView depth_view;
|
||||
vk::UniqueImageView stencil_view;
|
||||
vk::UniqueImageView storage_view;
|
||||
bool is_framebuffer{};
|
||||
bool is_storage{};
|
||||
};
|
||||
|
||||
class Framebuffer : public VideoCore::FramebufferParams {
|
||||
public:
|
||||
explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params,
|
||||
Surface* color, Surface* depth_stencil);
|
||||
~Framebuffer();
|
||||
|
||||
Framebuffer(const Framebuffer&) = delete;
|
||||
Framebuffer& operator=(const Framebuffer&) = delete;
|
||||
|
||||
Framebuffer(Framebuffer&& o) noexcept = default;
|
||||
Framebuffer& operator=(Framebuffer&& o) noexcept = default;
|
||||
|
||||
VideoCore::PixelFormat Format(VideoCore::SurfaceType type) const noexcept {
|
||||
return formats[Index(type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::ImageView ImageView(VideoCore::SurfaceType type) const noexcept {
|
||||
return image_views[Index(type)];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::Framebuffer Handle() const noexcept {
|
||||
return framebuffer.get();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::array<vk::Image, 2> Images() const noexcept {
|
||||
return images;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::array<vk::ImageAspectFlags, 2> Aspects() const noexcept {
|
||||
return aspects;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::RenderPass RenderPass() const noexcept {
|
||||
return render_pass;
|
||||
}
|
||||
|
||||
u32 Scale() const noexcept {
|
||||
return res_scale;
|
||||
}
|
||||
|
||||
u32 Width() const noexcept {
|
||||
return width;
|
||||
}
|
||||
|
||||
u32 Height() const noexcept {
|
||||
return height;
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<vk::Image, 2> images{};
|
||||
std::array<vk::ImageView, 2> image_views{};
|
||||
vk::UniqueFramebuffer framebuffer;
|
||||
vk::RenderPass render_pass;
|
||||
std::array<vk::ImageAspectFlags, 2> aspects{};
|
||||
std::array<VideoCore::PixelFormat, 2> formats{VideoCore::PixelFormat::Invalid,
|
||||
VideoCore::PixelFormat::Invalid};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
u32 res_scale{1};
|
||||
};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params);
|
||||
~Sampler();
|
||||
|
||||
Sampler(const Sampler&) = delete;
|
||||
Sampler& operator=(const Sampler&) = delete;
|
||||
|
||||
Sampler(Sampler&& o) noexcept = default;
|
||||
Sampler& operator=(Sampler&& o) noexcept = default;
|
||||
|
||||
[[nodiscard]] vk::Sampler Handle() const noexcept {
|
||||
return sampler.get();
|
||||
}
|
||||
|
||||
private:
|
||||
vk::UniqueSampler sampler;
|
||||
};
|
||||
|
||||
class DebugScope {
|
||||
public:
|
||||
template <typename... T>
|
||||
explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color,
|
||||
fmt::format_string<T...> format, T... args)
|
||||
: DebugScope{runtime, color, fmt::format(format, std::forward<T>(args)...)} {}
|
||||
explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label);
|
||||
~DebugScope();
|
||||
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
bool has_debug_tool;
|
||||
};
|
||||
|
||||
struct Traits {
|
||||
using Runtime = Vulkan::TextureRuntime;
|
||||
using Surface = Vulkan::Surface;
|
||||
using Sampler = Vulkan::Sampler;
|
||||
using Framebuffer = Vulkan::Framebuffer;
|
||||
using DebugScope = Vulkan::DebugScope;
|
||||
};
|
||||
|
||||
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
|
||||
|
||||
} // namespace Vulkan
|
|
@ -14,6 +14,7 @@
|
|||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/renderer_software/renderer_software.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
@ -39,6 +40,9 @@ void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window
|
|||
case Settings::GraphicsAPI::Software:
|
||||
g_renderer = std::make_unique<SwRenderer::RendererSoftware>(system, emu_window);
|
||||
break;
|
||||
case Settings::GraphicsAPI::Vulkan:
|
||||
g_renderer = std::make_unique<Vulkan::RendererVulkan>(system, emu_window, secondary_window);
|
||||
break;
|
||||
case Settings::GraphicsAPI::OpenGL:
|
||||
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window);
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue