renderer_vulkan: Add vulkan backend

This commit is contained in:
GPUCode 2023-07-25 22:16:41 +03:00
parent a8d590ae80
commit 59549a2eb6
45 changed files with 8914 additions and 21 deletions

View file

@ -242,6 +242,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_SurfaceChanged(JNIEnv* env,
if (window) {
window->OnSurfaceChanged(s_surf);
}
if (VideoCore::g_renderer) {
VideoCore::g_renderer->NotifySurfaceChanged();
}
LOG_INFO(Frontend, "Surface changed");
}

View file

@ -7,6 +7,7 @@
#include "citra_qt/configuration/configure_graphics.h"
#include "common/settings.h"
#include "ui_configure_graphics.h"
#include "video_core/renderer_vulkan/vk_instance.h"
ConfigureGraphics::ConfigureGraphics(std::span<const QString> physical_devices, bool is_powered_on,
QWidget* parent)

View file

@ -31,6 +31,8 @@ std::string_view GetGraphicsAPIName(GraphicsAPI api) {
return "Software";
case GraphicsAPI::OpenGL:
return "OpenGL";
case GraphicsAPI::Vulkan:
return "Vulkan";
default:
return "Invalid";
}

View file

@ -101,18 +101,47 @@ add_library(video_core STATIC
renderer_software/sw_texturing.cpp
renderer_software/sw_texturing.h
renderer_vulkan/pica_to_vk.h
renderer_vulkan/renderer_vulkan.cpp
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_blit_helper.cpp
renderer_vulkan/vk_blit_helper.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_descriptor_pool.cpp
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_rasterizer_cache.cpp
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_resource_pool.cpp
renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_cache.cpp
renderer_vulkan/vk_pipeline_cache.h
renderer_vulkan/vk_platform.cpp
renderer_vulkan/vk_platform.h
renderer_vulkan/vk_present_window.cpp
renderer_vulkan/vk_present_window.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_shader_gen.cpp
renderer_vulkan/vk_shader_gen.h
renderer_vulkan/vk_shader_gen_spv.cpp
renderer_vulkan/vk_shader_gen_spv.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_runtime.cpp
renderer_vulkan/vk_texture_runtime.h
shader/debug_data.h
shader/shader.cpp
shader/shader.h

View file

@ -708,8 +708,8 @@ FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color
fb_rect = depth_rect;
}
const Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr;
const Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr;
Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr;
Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr;
if (color_id) {
color_level = color_surface->LevelOf(color_params.addr);
@ -722,7 +722,7 @@ FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color
boost::icl::length(depth_vp_interval));
}
fb_params = FramebufferParams{
const FramebufferParams fb_params = {
.color_id = color_id,
.depth_id = depth_id,
.color_level = color_level,

View file

@ -26,7 +26,7 @@ using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size
class RasterizerInterface {
public:
virtual ~RasterizerInterface() {}
virtual ~RasterizerInterface() = default;
/// Queues the primitive formed by the given vertices for rendering
virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,

View file

@ -159,6 +159,7 @@ struct FramebufferRegs {
} stencil_test;
union {
u32 depth_color_mask;
BitField<0, 1, u32> depth_test_enable;
BitField<4, 3, CompareFunc> depth_test_func;
BitField<8, 1, u32> red_enable;

View file

@ -63,6 +63,9 @@ public:
/// Synchronizes fixed function renderer state
virtual void Sync() {}
/// This is called to notify the rendering backend of a surface change
virtual void NotifySurfaceChanged() {}
/// Returns the resolution scale factor relative to the native 3DS screen resolution
u32 GetResolutionScaleFactor();

View file

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/logging/log.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_mailbox.h"

View file

@ -6,6 +6,7 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/telemetry_session.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_common.h"
@ -172,7 +173,10 @@ inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopol
return vk::PrimitiveTopology::eTriangleList;
case Pica::PipelineRegs::TriangleTopology::Strip:
return vk::PrimitiveTopology::eTriangleStrip;
default:
UNREACHABLE_MSG("Unknown triangle topology {}", topology);
}
return vk::PrimitiveTopology::eTriangleList;
}
inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
@ -182,7 +186,10 @@ inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) {
case Pica::RasterizerRegs::CullMode::KeepClockWise:
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::CullModeFlagBits::eBack;
default:
UNREACHABLE_MSG("Unknown cull mode {}", mode);
}
return vk::CullModeFlagBits::eNone;
}
inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
@ -192,7 +199,10 @@ inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) {
return vk::FrontFace::eCounterClockwise;
case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
return vk::FrontFace::eClockwise;
default:
UNREACHABLE_MSG("Unknown cull mode {}", mode);
}
return vk::FrontFace::eClockwise;
}
inline Common::Vec4f ColorRGBA8(const u32 color) {

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,139 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <condition_variable>
#include <mutex>
#include "common/common_types.h"
#include "common/math_util.h"
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_present_window.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Core {
class System;
class TelemetrySession;
} // namespace Core
namespace Memory {
class MemorySystem;
}
namespace Layout {
struct FramebufferLayout;
}
namespace Vulkan {
struct TextureInfo {
u32 width;
u32 height;
GPU::Regs::PixelFormat format;
vk::Image image;
vk::ImageView image_view;
VmaAllocation allocation;
};
struct ScreenInfo {
TextureInfo texture;
Common::Rectangle<f32> texcoords;
vk::ImageView image_view;
};
struct PresentUniformData {
std::array<f32, 4 * 4> modelview;
Common::Vec4f i_resolution;
Common::Vec4f o_resolution;
int screen_id_l = 0;
int screen_id_r = 0;
int layer = 0;
int reverse_interlaced = 0;
};
static_assert(sizeof(PresentUniformData) == 112,
"PresentUniformData does not structure in shader!");
class RendererVulkan : public VideoCore::RendererBase {
static constexpr std::size_t PRESENT_PIPELINES = 3;
public:
explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window,
Frontend::EmuWindow* secondary_window);
~RendererVulkan() override;
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override {
return &rasterizer;
}
void NotifySurfaceChanged() override {
main_window.NotifySurfaceChanged();
}
void SwapBuffers() override;
void TryPresent(int timeout_ms, bool is_secondary) override {}
void Sync() override;
private:
void ReportDriver() const;
void ReloadPipeline();
void CompileShaders();
void BuildLayouts();
void BuildPipelines();
void ConfigureFramebufferTexture(TextureInfo& texture,
const GPU::Regs::FramebufferConfig& framebuffer);
void ConfigureRenderPipeline();
void PrepareRendertarget();
void RenderScreenshot();
void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout);
void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout,
bool flipped);
void DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, bool flipped);
void DrawBottomScreen(const Layout::FramebufferLayout& layout,
const Common::Rectangle<u32>& bottom_screen);
void DrawTopScreen(const Layout::FramebufferLayout& layout,
const Common::Rectangle<u32>& top_screen);
void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h,
Layout::DisplayOrientation orientation);
void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w,
float h, Layout::DisplayOrientation orientation);
void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
ScreenInfo& screen_info, bool right_eye);
void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
private:
Memory::MemorySystem& memory;
Core::TelemetrySession& telemetry_session;
Instance instance;
Scheduler scheduler;
RenderpassCache renderpass_cache;
DescriptorPool pool;
PresentWindow main_window;
StreamBuffer vertex_buffer;
RasterizerVulkan rasterizer;
std::unique_ptr<PresentWindow> second_window;
vk::UniquePipelineLayout present_pipeline_layout;
DescriptorSetProvider present_set_provider;
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
std::array<vk::Sampler, 2> present_samplers;
vk::ShaderModule present_vertex_shader;
u32 current_pipeline = 0;
std::array<ScreenInfo, 3> screen_infos{};
std::array<DescriptorData, 3> present_textures{};
PresentUniformData draw_info{};
vk::ClearColorValue clear_color{};
};
} // namespace Vulkan

View file

@ -0,0 +1,548 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/vector_math.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8_comp_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/host_shaders/vulkan_depth_to_buffer_comp_spv.h"
namespace Vulkan {
using VideoCore::PixelFormat;
namespace {
struct PushConstants {
std::array<float, 2> tex_scale;
std::array<float, 2> tex_offset;
};
struct ComputeInfo {
Common::Vec2i src_offset;
Common::Vec2i src_extent;
};
inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = 2 * sizeof(Common::Vec2i),
};
constexpr std::array<vk::DescriptorSetLayoutBinding, 3> COMPUTE_BINDINGS = {{
{0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute},
{1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute},
{2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eCompute},
}};
constexpr std::array<vk::DescriptorSetLayoutBinding, 3> COMPUTE_BUFFER_BINDINGS = {{
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute},
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute},
{2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute},
}};
constexpr std::array<vk::DescriptorSetLayoutBinding, 2> TWO_TEXTURES_BINDINGS = {{
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
}};
inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = sizeof(PushConstants),
};
constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.vertexBindingDescriptionCount = 0,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0,
.pVertexAttributeDescriptions = nullptr,
};
constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
.topology = vk::PrimitiveTopology::eTriangleList,
.primitiveRestartEnable = VK_FALSE,
};
constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = vk::PolygonMode::eFill,
.cullMode = vk::CullModeFlagBits::eBack,
.frontFace = vk::FrontFace::eClockwise,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
constexpr std::array DYNAMIC_STATES{
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
};
constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
.pDynamicStates = DYNAMIC_STATES.data(),
};
constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
.logicOpEnable = VK_FALSE,
.logicOp = vk::LogicOp::eClear,
.attachmentCount = 0,
.pAttachments = nullptr,
.blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
.depthTestEnable = VK_TRUE,
.depthWriteEnable = VK_TRUE,
.depthCompareOp = vk::CompareOp::eAlways,
.depthBoundsTestEnable = VK_FALSE,
.stencilTestEnable = VK_FALSE,
.front = vk::StencilOpState{},
.back = vk::StencilOpState{},
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
template <vk::Filter filter>
inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{
.magFilter = filter,
.minFilter = filter,
.mipmapMode = vk::SamplerMipmapMode::eNearest,
.addressModeU = vk::SamplerAddressMode::eClampToBorder,
.addressModeV = vk::SamplerAddressMode::eClampToBorder,
.addressModeW = vk::SamplerAddressMode::eClampToBorder,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = vk::CompareOp::eNever,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = vk::BorderColor::eFloatOpaqueWhite,
.unnormalizedCoordinates = VK_FALSE,
};
constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo(
const vk::DescriptorSetLayout* set_layout, bool compute = false) {
return vk::PipelineLayoutCreateInfo{
.setLayoutCount = 1,
.pSetLayouts = set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE),
};
}
constexpr std::array<vk::PipelineShaderStageCreateInfo, 2> MakeStages(
vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) {
return std::array{
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
.module = vertex_shader,
.pName = "main",
},
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
.module = fragment_shader,
.pName = "main",
},
};
}
constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) {
return vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eCompute,
.module = compute_shader,
.pName = "main",
};
}
} // Anonymous namespace
BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool,
RenderpassCache& renderpass_cache_)
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_},
device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS},
compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS},
two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS},
compute_pipeline_layout{
device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))},
compute_buffer_pipeline_layout{device.createPipelineLayout(
PipelineLayoutCreateInfo(&compute_buffer_provider.Layout(), true))},
two_textures_pipeline_layout{
device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_provider.Layout()))},
full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)},
d24s8_to_rgba8_comp{CompileSPV(VULKAN_D24S8_TO_RGBA8_COMP_SPV, device)},
depth_to_buffer_comp{CompileSPV(VULKAN_DEPTH_TO_BUFFER_COMP_SPV, device)},
blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)},
d24s8_to_rgba8_pipeline{MakeComputePipeline(d24s8_to_rgba8_comp, compute_pipeline_layout)},
depth_to_buffer_pipeline{
MakeComputePipeline(depth_to_buffer_comp, compute_buffer_pipeline_layout)},
depth_blit_pipeline{MakeDepthStencilBlitPipeline()},
linear_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eLinear>)},
nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eNearest>)} {}
BlitHelper::~BlitHelper() {
device.destroyPipelineLayout(compute_pipeline_layout);
device.destroyPipelineLayout(compute_buffer_pipeline_layout);
device.destroyPipelineLayout(two_textures_pipeline_layout);
device.destroyShaderModule(full_screen_vert);
device.destroyShaderModule(d24s8_to_rgba8_comp);
device.destroyShaderModule(depth_to_buffer_comp);
device.destroyShaderModule(blit_depth_stencil_frag);
device.destroyPipeline(depth_to_buffer_pipeline);
device.destroyPipeline(d24s8_to_rgba8_pipeline);
device.destroyPipeline(depth_blit_pipeline);
device.destroySampler(linear_sampler);
device.destroySampler(nearest_sampler);
}
void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout,
const VideoCore::TextureBlit& blit) {
const vk::Offset2D offset{
.x = std::min<s32>(blit.dst_rect.left, blit.dst_rect.right),
.y = std::min<s32>(blit.dst_rect.bottom, blit.dst_rect.top),
};
const vk::Extent2D extent{
.width = blit.dst_rect.GetWidth(),
.height = blit.dst_rect.GetHeight(),
};
const vk::Viewport viewport{
.x = static_cast<float>(offset.x),
.y = static_cast<float>(offset.y),
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const vk::Rect2D scissor{
.offset = offset,
.extent = extent,
};
const float scale_x = static_cast<float>(blit.src_rect.GetWidth());
const float scale_y = static_cast<float>(blit.src_rect.GetHeight());
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(blit.src_rect.left),
static_cast<float>(blit.src_rect.bottom)},
};
cmdbuf.setViewport(0, viewport);
cmdbuf.setScissor(0, scissor);
cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants),
&push_constants);
}
bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
if (!instance.IsShaderStencilExportSupported()) {
LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images");
return false;
}
const vk::Rect2D dst_render_area = {
.offset = {0, 0},
.extent = {dest.GetScaledWidth(), dest.GetScaledHeight()},
};
std::array<DescriptorData, 2> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
textures[1].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
const auto descriptor_set = two_textures_provider.Acquire(textures);
const RenderPass depth_pass = {
.framebuffer = dest.Framebuffer(),
.render_pass =
renderpass_cache.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false),
.render_area = dst_render_area,
};
renderpass_cache.BeginRendering(depth_pass);
scheduler.Record([blit, descriptor_set, this](vk::CommandBuffer cmdbuf) {
const vk::PipelineLayout layout = two_textures_pipeline_layout;
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {});
BindBlitState(cmdbuf, layout, blit);
cmdbuf.draw(3, 1, 0, 0);
});
scheduler.MakeDirty(StateFlags::Pipeline);
return true;
}
bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
std::array<DescriptorData, 3> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[1].image_info = vk::DescriptorImageInfo{
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[2].image_info = vk::DescriptorImageInfo{
.imageView = dest.ImageView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
const auto descriptor_set = compute_provider.Acquire(textures);
renderpass_cache.EndRendering();
scheduler.Record([this, descriptor_set, blit, src_image = source.Image(),
dst_image = dest.Image()](vk::CommandBuffer cmdbuf) {
const std::array pre_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask =
vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask =
vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
descriptor_set, {});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, d24s8_to_rgba8_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
});
return true;
}
bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer,
const VideoCore::BufferTextureCopy& copy) {
std::array<DescriptorData, 3> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[1].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[2].buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = copy.buffer_offset,
.range = copy.buffer_size,
};
const auto descriptor_set = compute_buffer_provider.Acquire(textures);
renderpass_cache.EndRendering();
scheduler.Record([this, descriptor_set, copy, src_image = source.Image(),
extent = source.RealExtent(false)](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const vk::ImageMemoryBarrier post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_buffer_pipeline_layout,
0, descriptor_set, {});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, depth_to_buffer_pipeline);
const ComputeInfo info = {
.src_offset = Common::Vec2i{static_cast<int>(copy.texture_rect.left),
static_cast<int>(copy.texture_rect.bottom)},
.src_extent =
Common::Vec2i{static_cast<int>(extent.width), static_cast<int>(extent.height)},
};
cmdbuf.pushConstants(compute_buffer_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(ComputeInfo), &info);
cmdbuf.dispatch(copy.texture_rect.GetWidth() / 8, copy.texture_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
});
return true;
}
vk::Pipeline BlitHelper::MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout) {
const vk::ComputePipelineCreateInfo compute_info = {
.stage = MakeStages(shader),
.layout = layout,
};
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
return result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!");
UNREACHABLE();
}
}
vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() {
if (!instance.IsShaderStencilExportSupported()) {
return VK_NULL_HANDLE;
}
const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag);
const auto renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
VideoCore::PixelFormat::D24S8, false);
vk::GraphicsPipelineCreateInfo depth_stencil_info = {
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = two_textures_pipeline_layout,
.renderPass = renderpass,
};
if (const auto result = device.createGraphicsPipeline({}, depth_stencil_info);
result.result == vk::Result::eSuccess) {
return result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!");
UNREACHABLE();
}
return VK_NULL_HANDLE;
}
} // namespace Vulkan

View file

@ -0,0 +1,71 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
namespace VideoCore {
struct TextureBlit;
struct BufferTextureCopy;
} // namespace VideoCore
namespace Vulkan {
class Instance;
class RenderpassCache;
class Scheduler;
class Surface;
class BlitHelper {
friend class TextureRuntime;
public:
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache);
~BlitHelper();
bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
bool DepthToBuffer(Surface& source, vk::Buffer buffer,
const VideoCore::BufferTextureCopy& copy);
private:
/// Creates compute pipelines used for blit
vk::Pipeline MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout);
/// Creates graphics pipelines used for blit
vk::Pipeline MakeDepthStencilBlitPipeline();
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
vk::Device device;
vk::RenderPass r32_renderpass;
DescriptorSetProvider compute_provider;
DescriptorSetProvider compute_buffer_provider;
DescriptorSetProvider two_textures_provider;
vk::PipelineLayout compute_pipeline_layout;
vk::PipelineLayout compute_buffer_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule d24s8_to_rgba8_comp;
vk::ShaderModule depth_to_buffer_comp;
vk::ShaderModule blit_depth_stencil_frag;
vk::Pipeline d24s8_to_rgba8_pipeline;
vk::Pipeline depth_to_buffer_pipeline;
vk::Pipeline depth_blit_pipeline;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
};
} // namespace Vulkan

View file

@ -9,6 +9,7 @@
#define VK_NO_PROTOTYPES
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_NO_UNION_CONSTRUCTORS
#define VULKAN_HPP_NO_STRUCT_SETTERS
#include <vulkan/vulkan.hpp>

View file

@ -0,0 +1,141 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire",
MP_RGB(64, 128, 256));
constexpr u32 MAX_BATCH_SIZE = 8;
DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} {
auto& pool = pools.emplace_back();
pool = CreatePool();
}
DescriptorPool::~DescriptorPool() = default;
std::vector<vk::DescriptorSet> DescriptorPool::Allocate(vk::DescriptorSetLayout layout,
u32 num_sets) {
std::array<vk::DescriptorSetLayout, MAX_BATCH_SIZE> layouts;
layouts.fill(layout);
u32 current_pool = 0;
vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = *pools[current_pool],
.descriptorSetCount = num_sets,
.pSetLayouts = layouts.data(),
};
while (true) {
try {
return instance.GetDevice().allocateDescriptorSets(alloc_info);
} catch (const vk::OutOfPoolMemoryError&) {
current_pool++;
if (current_pool == pools.size()) {
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
auto& pool = pools.emplace_back();
pool = CreatePool();
}
alloc_info.descriptorPool = *pools[current_pool];
}
}
}
vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) {
const auto sets = Allocate(layout, 1);
return sets[0];
}
vk::UniqueDescriptorPool DescriptorPool::CreatePool() {
// Choose a sane pool size good for most games
static constexpr std::array<vk::DescriptorPoolSize, 6> pool_sizes = {{
{vk::DescriptorType::eUniformBufferDynamic, 64},
{vk::DescriptorType::eUniformTexelBuffer, 64},
{vk::DescriptorType::eCombinedImageSampler, 4096},
{vk::DescriptorType::eSampledImage, 256},
{vk::DescriptorType::eStorageImage, 256},
{vk::DescriptorType::eStorageBuffer, 32},
}};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 4098,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info);
}
DescriptorSetProvider::DescriptorSetProvider(
const Instance& instance, DescriptorPool& pool_,
std::span<const vk::DescriptorSetLayoutBinding> bindings)
: pool{pool_}, device{instance.GetDevice()} {
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
for (u32 i = 0; i < bindings.size(); i++) {
update_entries[i] = vk::DescriptorUpdateTemplateEntry{
.dstBinding = bindings[i].binding,
.dstArrayElement = 0,
.descriptorCount = bindings[i].descriptorCount,
.descriptorType = bindings[i].descriptorType,
.offset = i * sizeof(DescriptorData),
.stride = sizeof(DescriptorData),
};
}
const vk::DescriptorSetLayoutCreateInfo layout_info = {
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
layout = device.createDescriptorSetLayoutUnique(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(bindings.size()),
.pDescriptorUpdateEntries = update_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = *layout,
};
update_template = device.createDescriptorUpdateTemplateUnique(template_info);
}
DescriptorSetProvider::~DescriptorSetProvider() = default;
vk::DescriptorSet DescriptorSetProvider::Acquire(std::span<const DescriptorData> data) {
MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire);
DescriptorSetData key{};
std::memcpy(key.data(), data.data(), data.size_bytes());
const auto [it, new_set] = descriptor_set_map.try_emplace(key);
if (!new_set) {
return it->second;
}
if (free_sets.empty()) {
free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE);
}
it.value() = free_sets.back();
free_sets.pop_back();
device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]);
return it->second;
}
void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) {
for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) {
const auto& [data, set] = *it;
const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) {
return info.image_info.imageView == image_view;
});
if (has_image) {
free_sets.push_back(set);
it = descriptor_set_map.erase(it);
} else {
it++;
}
}
}
} // namespace Vulkan

View file

@ -0,0 +1,92 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <vector>
#include <tsl/robin_map.h>
#include "common/hash.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
constexpr u32 MAX_DESCRIPTORS = 7;
union DescriptorData {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
bool operator==(const DescriptorData& other) const noexcept {
return std::memcmp(this, &other, sizeof(DescriptorData)) == 0;
}
};
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
struct DataHasher {
u64 operator()(const DescriptorSetData& data) const noexcept {
return Common::ComputeHash64(data.data(), sizeof(data));
}
};
/**
* An interface for allocating descriptor sets that manages a collection of descriptor pools.
*/
class DescriptorPool {
public:
explicit DescriptorPool(const Instance& instance);
~DescriptorPool();
std::vector<vk::DescriptorSet> Allocate(vk::DescriptorSetLayout layout, u32 num_sets);
vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout);
private:
vk::UniqueDescriptorPool CreatePool();
private:
const Instance& instance;
std::vector<vk::UniqueDescriptorPool> pools;
};
/**
* Allocates and caches descriptor sets of a specific layout.
*/
class DescriptorSetProvider {
public:
explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool,
std::span<const vk::DescriptorSetLayoutBinding> bindings);
~DescriptorSetProvider();
vk::DescriptorSet Acquire(std::span<const DescriptorData> data);
void FreeWithImage(vk::ImageView image_view);
[[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept {
return *layout;
}
[[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept {
return layout.get();
}
[[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept {
return *update_template;
}
private:
DescriptorPool& pool;
vk::Device device;
vk::UniqueDescriptorSetLayout layout;
vk::UniqueDescriptorUpdateTemplate update_template;
std::vector<vk::DescriptorSet> free_sets;
tsl::robin_map<DescriptorSetData, vk::DescriptorSet, DataHasher> descriptor_set_map;
};
} // namespace Vulkan

View file

@ -0,0 +1,290 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <boost/container/static_vector.hpp>
#include "common/hash.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32));
vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) {
switch (index) {
case 0:
return vk::ShaderStageFlagBits::eVertex;
case 1:
return vk::ShaderStageFlagBits::eFragment;
case 2:
return vk::ShaderStageFlagBits::eGeometry;
default:
LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!");
UNREACHABLE();
}
return vk::ShaderStageFlagBits::eVertex;
}
u64 PipelineInfo::Hash(const Instance& instance) const {
u64 info_hash = 0;
const auto append_hash = [&info_hash](const auto& data) {
const u64 data_hash = Common::ComputeStructHash64(data);
info_hash = Common::HashCombine(info_hash, data_hash);
};
append_hash(vertex_layout);
append_hash(attachments);
append_hash(blending);
if (!instance.IsExtendedDynamicStateSupported()) {
append_hash(rasterization);
append_hash(depth_stencil);
}
return info_hash;
}
Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {}
Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code)
: Shader{instance} {
module = Compile(code, stage, instance.GetDevice());
MarkDone();
}
Shader::~Shader() {
if (device && module) {
device.destroyShaderModule(module);
}
}
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_,
const PipelineInfo& info_, vk::PipelineCache pipeline_cache_,
vk::PipelineLayout layout_, std::array<Shader*, 3> stages_,
Common::ThreadWorker* worker_)
: instance{instance_}, renderpass_cache{renderpass_cache_}, worker{worker_},
pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_} {}
GraphicsPipeline::~GraphicsPipeline() = default;
bool GraphicsPipeline::TryBuild(bool wait_built) {
if (is_pending) {
return true;
}
// If the shaders haven't been compiled yet, we cannot proceed
const bool shaders_pending = std::any_of(
stages.begin(), stages.end(), [](Shader* shader) { return shader && !shader->IsDone(); });
if (!wait_built && shaders_pending) {
return false;
}
// Ask the driver if it can give us the pipeline quickly
if (!wait_built && instance.IsPipelineCreationCacheControlSupported() && Build(true)) {
return true;
}
// Fallback to (a)synchronous compilation
if (worker) {
worker->QueueWork([this] { Build(); });
is_pending = true;
} else {
Build();
}
return true;
}
bool GraphicsPipeline::Build(bool fail_on_compile_required) {
MICROPROFILE_SCOPE(Vulkan_Pipeline);
const vk::Device device = instance.GetDevice();
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
const auto& binding = info.vertex_layout.bindings[i];
bindings[i] = vk::VertexInputBindingDescription{
.binding = binding.binding,
.stride = binding.stride,
.inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance
: vk::VertexInputRate::eVertex,
};
}
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
const auto& attr = info.vertex_layout.attributes[i];
const FormatTraits& traits = instance.GetTraits(attr.type, attr.size);
attributes[i] = vk::VertexInputAttributeDescription{
.location = attr.location,
.binding = attr.binding,
.format = traits.native,
.offset = attr.offset,
};
// At the end there's always the fixed binding which takes up
// at least 16 bytes so we should always be able to alias.
if (traits.needs_emulation) {
const FormatTraits& comp_four_traits = instance.GetTraits(attr.type, 4);
attributes[i].format = comp_four_traits.native;
}
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
.pVertexAttributeDescriptions = attributes.data(),
};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
.topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
.primitiveRestartEnable = false,
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
.frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode),
.depthBiasEnable = false,
.lineWidth = 1.0f,
};
const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = false,
};
const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
.blendEnable = info.blending.blend_enable,
.srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
.dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
.colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
.srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
.dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
.alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
.colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask),
};
const vk::PipelineColorBlendStateCreateInfo color_blending = {
.logicOpEnable = !info.blending.blend_enable && !instance.NeedsLogicOpEmulation(),
.logicOp = PicaToVK::LogicOp(info.blending.logic_op),
.attachmentCount = 1,
.pAttachments = &colorblend_attachment,
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
};
const vk::Viewport viewport = {
.x = 0.0f,
.y = 0.0f,
.width = 1.0f,
.height = 1.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const vk::Rect2D scissor = {
.offset = {0, 0},
.extent = {1, 1},
};
const vk::PipelineViewportStateCreateInfo viewport_info = {
.viewportCount = 1,
.pViewports = &viewport,
.scissorCount = 1,
.pScissors = &scissor,
};
boost::container::static_vector<vk::DynamicState, 20> dynamic_states = {
vk::DynamicState::eViewport, vk::DynamicState::eScissor,
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
vk::DynamicState::eStencilReference, vk::DynamicState::eBlendConstants,
};
if (instance.IsExtendedDynamicStateSupported()) {
constexpr std::array extended = {
vk::DynamicState::eCullModeEXT, vk::DynamicState::eDepthCompareOpEXT,
vk::DynamicState::eDepthTestEnableEXT, vk::DynamicState::eDepthWriteEnableEXT,
vk::DynamicState::eFrontFaceEXT, vk::DynamicState::ePrimitiveTopologyEXT,
vk::DynamicState::eStencilOpEXT, vk::DynamicState::eStencilTestEnableEXT,
};
dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
}
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
.pDynamicStates = dynamic_states.data(),
};
const vk::StencilOpState stencil_op_state = {
.failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
.passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
.depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
.compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op),
};
const vk::PipelineDepthStencilStateCreateInfo depth_info = {
.depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
.depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
.depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
.depthBoundsTestEnable = false,
.stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
.front = stencil_op_state,
.back = stencil_op_state,
};
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (std::size_t i = 0; i < stages.size(); i++) {
Shader* shader = stages[i];
if (!shader) {
continue;
}
shader->WaitDone();
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
.stage = MakeShaderStage(i),
.module = shader->Handle(),
.pName = "main",
};
}
vk::GraphicsPipelineCreateInfo pipeline_info = {
.stageCount = shader_count,
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = pipeline_layout,
.renderPass =
renderpass_cache.GetRenderpass(info.attachments.color, info.attachments.depth, false),
};
if (fail_on_compile_required) {
pipeline_info.flags |= vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT;
}
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
if (result.result == vk::Result::eSuccess) {
pipeline = std::move(result.value);
} else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) {
return false;
} else {
UNREACHABLE_MSG("Graphics pipeline creation failed!");
}
MarkDone();
return true;
}
} // namespace Vulkan

View file

@ -0,0 +1,192 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
namespace Common {
struct AsyncHandle {
public:
AsyncHandle(bool is_done_ = false) : is_done{is_done_} {}
[[nodiscard]] bool IsDone() noexcept {
return is_done.load(std::memory_order::relaxed);
}
void WaitDone() noexcept {
std::unique_lock lock{mutex};
condvar.wait(lock, [this] { return is_done.load(std::memory_order::relaxed); });
}
void MarkDone(bool done = true) noexcept {
std::scoped_lock lock{mutex};
is_done = done;
condvar.notify_all();
}
private:
std::condition_variable condvar;
std::mutex mutex;
std::atomic_bool is_done{false};
};
} // namespace Common
namespace Vulkan {
class Instance;
class RenderpassCache;
constexpr u32 MAX_SHADER_STAGES = 3;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
constexpr u32 MAX_VERTEX_BINDINGS = 16;
/**
* The pipeline state is tightly packed with bitfields to reduce
* the overhead of hashing as much as possible
*/
union RasterizationState {
u8 value = 0;
BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology;
BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
};
union DepthStencilState {
u32 value = 0;
BitField<0, 1, u32> depth_test_enable;
BitField<1, 1, u32> depth_write_enable;
BitField<2, 1, u32> stencil_test_enable;
BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
};
struct BlendingState {
u16 blend_enable;
u16 color_write_mask;
Pica::FramebufferRegs::LogicOp logic_op;
union {
u32 value = 0;
BitField<0, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor;
BitField<4, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor;
BitField<8, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq;
BitField<11, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor;
BitField<15, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor;
BitField<19, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq;
};
};
struct DynamicState {
u32 blend_color = 0;
u8 stencil_reference;
u8 stencil_compare_mask;
u8 stencil_write_mask;
bool operator==(const DynamicState& other) const noexcept {
return std::memcmp(this, &other, sizeof(DynamicState)) == 0;
}
};
union VertexBinding {
u16 value = 0;
BitField<0, 4, u16> binding;
BitField<4, 1, u16> fixed;
BitField<5, 11, u16> stride;
};
union VertexAttribute {
u32 value = 0;
BitField<0, 4, u32> binding;
BitField<4, 4, u32> location;
BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type;
BitField<11, 3, u32> size;
BitField<14, 11, u32> offset;
};
struct VertexLayout {
u8 binding_count;
u8 attribute_count;
std::array<VertexBinding, MAX_VERTEX_BINDINGS> bindings;
std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
};
struct AttachmentInfo {
VideoCore::PixelFormat color;
VideoCore::PixelFormat depth;
};
/**
* Information about a graphics/compute pipeline
*/
struct PipelineInfo {
VertexLayout vertex_layout;
BlendingState blending;
AttachmentInfo attachments;
RasterizationState rasterization;
DepthStencilState depth_stencil;
DynamicState dynamic;
[[nodiscard]] u64 Hash(const Instance& instance) const;
[[nodiscard]] bool IsDepthWriteEnabled() const noexcept {
const bool has_stencil = attachments.depth == VideoCore::PixelFormat::D24S8;
const bool depth_write =
depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
const bool stencil_write =
has_stencil && depth_stencil.stencil_test_enable && dynamic.stencil_write_mask != 0;
return depth_write || stencil_write;
}
};
struct Shader : public Common::AsyncHandle {
explicit Shader(const Instance& instance);
explicit Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code);
~Shader();
[[nodiscard]] vk::ShaderModule Handle() const noexcept {
return module;
}
vk::ShaderModule module;
vk::Device device;
std::string program;
};
class GraphicsPipeline : public Common::AsyncHandle {
public:
explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache,
const PipelineInfo& info, vk::PipelineCache pipeline_cache,
vk::PipelineLayout layout, std::array<Shader*, 3> stages,
Common::ThreadWorker* worker);
~GraphicsPipeline();
bool TryBuild(bool wait_built);
bool Build(bool fail_on_compile_required = false);
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline;
}
private:
const Instance& instance;
RenderpassCache& renderpass_cache;
Common::ThreadWorker* worker;
vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache;
PipelineInfo info;
std::array<Shader*, 3> stages;
bool is_pending{};
};
} // namespace Vulkan

View file

@ -210,12 +210,16 @@ FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk::
best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
vk::ImageUsageFlagBits::eTransferSrc;
}
if (supports_attachment) {
// Attachment flag is only needed for color and depth formats.
if (supports_attachment &&
VideoCore::GetFormatType(pixel_format) != VideoCore::SurfaceType::Texture) {
best_usage |= (format_aspect & vk::ImageAspectFlagBits::eDepth)
? vk::ImageUsageFlagBits::eDepthStencilAttachment
: vk::ImageUsageFlagBits::eColorAttachment;
}
if (supports_storage) {
// Storage flag is only needed for shadow rendering with RGBA8 texture.
// Keeping it disables can boost performance on mobile drivers.
if (supports_storage && pixel_format == VideoCore::PixelFormat::RGBA8) {
best_usage |= vk::ImageUsageFlagBits::eStorage;
}

View file

@ -0,0 +1,207 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <limits>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
constexpr u64 WAIT_TIMEOUT = std::numeric_limits<u64>::max();
MasterSemaphoreTimeline::MasterSemaphoreTimeline(const Instance& instance_) : instance{instance_} {
const vk::StructureChain semaphore_chain = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfoKHR{
.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0,
},
};
semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get());
}
MasterSemaphoreTimeline::~MasterSemaphoreTimeline() = default;
void MasterSemaphoreTimeline::Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = instance.GetDevice().getSemaphoreCounterValueKHR(*semaphore);
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
void MasterSemaphoreTimeline::Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
const vk::SemaphoreWaitInfoKHR wait_info = {
.semaphoreCount = 1,
.pSemaphores = &semaphore.get(),
.pValues = &tick,
};
while (instance.GetDevice().waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) !=
vk::Result::eSuccess) {
}
Refresh();
}
void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
vk::Semaphore signal, u64 signal_value) {
cmdbuf.end();
const u32 num_signal_semaphores = signal ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{Handle(), signal};
const u32 num_wait_semaphores = wait ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{Handle(), wait};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
try {
instance.GetGraphicsQueue().submit(submit_info);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
}
constexpr u64 FENCE_RESERVE = 8;
MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} {
const vk::Device device{instance.GetDevice()};
for (u64 i = 0; i < FENCE_RESERVE; i++) {
free_queue.push(device.createFenceUnique({}));
}
wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
}
MasterSemaphoreFence::~MasterSemaphoreFence() = default;
void MasterSemaphoreFence::Refresh() {}
void MasterSemaphoreFence::Wait(u64 tick) {
while (true) {
u64 current_value = gpu_tick.load(std::memory_order_relaxed);
if (current_value >= tick) {
return;
}
gpu_tick.wait(current_value);
}
}
void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
vk::Semaphore signal, u64 signal_value) {
cmdbuf.end();
const u32 num_signal_semaphores = signal ? 1U : 0U;
const u32 num_wait_semaphores = wait ? 1U : 0U;
static constexpr std::array<vk::PipelineStageFlags, 1> wait_stage_masks = {
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = &wait,
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = &signal,
};
vk::UniqueFence fence{GetFreeFence()};
try {
instance.GetGraphicsQueue().submit(submit_info, *fence);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
std::scoped_lock lock{wait_mutex};
wait_queue.push({
.handle = std::move(fence),
.signal_value = signal_value,
});
wait_cv.notify_one();
}
void MasterSemaphoreFence::WaitThread(std::stop_token token) {
const vk::Device device{instance.GetDevice()};
while (!token.stop_requested()) {
Fence fence;
{
std::unique_lock lock{wait_mutex};
Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
if (token.stop_requested()) {
return;
}
fence = std::move(wait_queue.front());
wait_queue.pop();
}
const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT);
if (result != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result));
UNREACHABLE();
}
device.resetFences(*fence.handle);
gpu_tick.store(fence.signal_value);
gpu_tick.notify_all();
std::scoped_lock lock{free_mutex};
free_queue.push(std::move(fence.handle));
}
}
vk::UniqueFence MasterSemaphoreFence::GetFreeFence() {
std::scoped_lock lock{free_mutex};
if (free_queue.empty()) {
return instance.GetDevice().createFenceUnique({});
}
vk::UniqueFence fence{std::move(free_queue.front())};
free_queue.pop();
return fence;
}
} // namespace Vulkan

View file

@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <condition_variable>
#include <queue>
#include "common/common_types.h"
#include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Scheduler;
class MasterSemaphore {
public:
virtual ~MasterSemaphore() = default;
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick.load(std::memory_order_acquire);
}
[[nodiscard]] u64 KnownGpuTick() const noexcept {
return gpu_tick.load(std::memory_order_acquire);
}
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
}
[[nodiscard]] u64 NextTick() noexcept {
return current_tick.fetch_add(1, std::memory_order_release);
}
/// Refresh the known GPU tick
virtual void Refresh() = 0;
/// Waits for a tick to be hit on the GPU
virtual void Wait(u64 tick) = 0;
/// Submits the provided command buffer for execution
virtual void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
u64 signal_value) = 0;
protected:
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
};
class MasterSemaphoreTimeline : public MasterSemaphore {
public:
explicit MasterSemaphoreTimeline(const Instance& instance);
~MasterSemaphoreTimeline() override;
[[nodiscard]] vk::Semaphore Handle() const noexcept {
return semaphore.get();
}
void Refresh() override;
void Wait(u64 tick) override;
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
u64 signal_value) override;
private:
const Instance& instance;
vk::UniqueSemaphore semaphore; ///< Timeline semaphore.
};
class MasterSemaphoreFence : public MasterSemaphore {
public:
explicit MasterSemaphoreFence(const Instance& instance);
~MasterSemaphoreFence() override;
void Refresh() override;
void Wait(u64 tick) override;
void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal,
u64 signal_value) override;
private:
void WaitThread(std::stop_token token);
vk::UniqueFence GetFreeFence();
private:
const Instance& instance;
struct Fence {
vk::UniqueFence handle;
u64 signal_value;
};
std::queue<vk::UniqueFence> free_queue;
std::queue<Fence> wait_queue;
std::mutex free_mutex;
std::mutex wait_mutex;
std::condition_variable_any wait_cv;
std::jthread wait_thread;
};
} // namespace Vulkan

View file

@ -0,0 +1,519 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <boost/container/static_vector.hpp>
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32));
namespace Vulkan {
enum ProgramType : u32 {
VS = 0,
GS = 2,
FS = 1,
};
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
switch (format) {
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
return sizeof(float) * size;
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
return sizeof(u16) * size;
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
return sizeof(u8) * size;
}
return 0;
}
AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat format) {
switch (format) {
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
return AttribLoadFlags::Sint;
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
return AttribLoadFlags::Uint;
default:
return AttribLoadFlags::Float;
}
}
constexpr std::array<vk::DescriptorSetLayoutBinding, 5> BUFFER_BINDINGS = {{
{0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex},
{1, vk::DescriptorType::eUniformBufferDynamic, 1,
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry |
vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
{3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
{4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
}};
constexpr std::array<vk::DescriptorSetLayoutBinding, 4> TEXTURE_BINDINGS = {{
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
}};
// TODO: Use descriptor array for shadow cube
constexpr std::array<vk::DescriptorSetLayoutBinding, 7> SHADOW_BINDINGS = {{
{0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
}};
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
RenderpassCache& renderpass_cache_, DescriptorPool& pool_)
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_},
num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)},
workers{num_worker_threads, "Pipeline workers"},
descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS},
DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS},
DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}},
trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex,
GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} {
BuildLayout();
}
void PipelineCache::BuildLayout() {
std::array<vk::DescriptorSetLayout, NUM_RASTERIZER_SETS> descriptor_set_layouts;
std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(),
descriptor_set_layouts.begin(),
[](const auto& provider) { return provider.Layout(); });
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = NUM_RASTERIZER_SETS,
.pSetLayouts = descriptor_set_layouts.data(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
}
PipelineCache::~PipelineCache() {
SaveDiskCache();
}
void PipelineCache::LoadDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
vk::PipelineCacheCreateInfo cache_info = {
.initialDataSize = 0,
.pInitialData = nullptr,
};
std::vector<u8> cache_data;
FileUtil::IOFile cache_file{cache_file_path, "r"};
if (cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
const u64 cache_file_size = cache_file.GetSize();
cache_data.resize(cache_file_size);
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
if (!IsCacheValid(cache_data)) {
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring");
} else {
cache_info.initialDataSize = cache_file_size;
cache_info.pInitialData = cache_data.data();
}
}
cache_file.Close();
}
vk::Device device = instance.GetDevice();
pipeline_cache = device.createPipelineCacheUnique(cache_info);
}
void PipelineCache::SaveDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
FileUtil::IOFile cache_file{cache_file_path, "wb"};
if (!cache_file.IsOpen()) {
LOG_ERROR(Render_Vulkan, "Unable to open pipeline cache for writing");
return;
}
vk::Device device = instance.GetDevice();
auto cache_data = device.getPipelineCacheData(*pipeline_cache);
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
LOG_ERROR(Render_Vulkan, "Error during pipeline cache write");
return;
}
cache_file.Close();
}
bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
MICROPROFILE_SCOPE(Vulkan_Bind);
u64 shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash = info.Hash(instance);
const u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash);
if (new_pipeline) {
it.value() = std::make_unique<GraphicsPipeline>(
instance, renderpass_cache, info, *pipeline_cache, *pipeline_layout, current_shaders,
wait_built ? nullptr : &workers);
}
GraphicsPipeline* const pipeline{it->second.get()};
if (!pipeline->IsDone() && !pipeline->TryBuild(wait_built)) {
return false;
}
for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) {
if (!set_dirty[i]) {
continue;
}
bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]);
set_dirty[i] = false;
}
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty;
scheduler.Record([this, is_dirty, pipeline_dirty, pipeline,
current_dynamic = current_info.dynamic, dynamic = info.dynamic,
descriptor_sets = bound_descriptor_sets, offsets = offsets,
current_rasterization = current_info.rasterization,
current_depth_stencil = current_info.depth_stencil,
rasterization = info.rasterization,
depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) {
if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_compare_mask);
}
if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) {
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_write_mask);
}
if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) {
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_reference);
}
if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) {
const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color);
cmdbuf.setBlendConstants(color.AsArray());
}
if (instance.IsExtendedDynamicStateSupported()) {
if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) {
cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode));
cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode));
}
if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op ||
is_dirty) {
cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op));
}
if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable ||
is_dirty) {
cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable);
}
if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable ||
is_dirty) {
cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable);
}
if (rasterization.topology != current_rasterization.topology || is_dirty) {
cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology));
}
if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable ||
is_dirty) {
cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable);
}
if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op ||
depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op ||
depth_stencil.stencil_depth_fail_op !=
current_depth_stencil.stencil_depth_fail_op ||
depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op ||
is_dirty) {
cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(depth_stencil.stencil_compare_op));
}
}
if (pipeline_dirty) {
if (!pipeline->IsDone()) {
pipeline->WaitDone();
}
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
}
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
descriptor_sets, offsets);
});
current_info = info;
current_pipeline = pipeline;
return true;
}
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance};
config.state.use_geometry_shader = instance.UseGeometryShaders();
for (u32 i = 0; i < layout.attribute_count; i++) {
const VertexAttribute& attr = layout.attributes[i];
const FormatTraits& traits = instance.GetTraits(attr.type, attr.size);
const u32 location = attr.location.Value();
AttribLoadFlags& flags = config.state.load_flags[location];
if (traits.needs_conversion) {
flags = MakeAttribLoadFlag(attr.type);
}
if (traits.needs_emulation) {
flags |= AttribLoadFlags::ZeroW;
}
}
auto [it, new_config] = programmable_vertex_map.try_emplace(config);
if (new_config) {
auto code = GenerateVertexShader(setup, config);
if (!code) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
programmable_vertex_map[config] = nullptr;
return false;
}
std::string& program = code.value();
auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance);
auto& shader = iter->second;
if (new_program) {
shader.program = std::move(program);
const vk::Device device = instance.GetDevice();
workers.QueueWork([device, &shader] {
shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device);
shader.MarkDone();
});
}
it->second = &shader;
}
Shader* const shader{it->second};
if (!shader) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
return false;
}
current_shaders[ProgramType::VS] = shader;
shader_hashes[ProgramType::VS] = config.Hash();
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_shaders[ProgramType::VS] = &trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
}
bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
if (!instance.UseGeometryShaders()) {
UseTrivialGeometryShader();
return true;
}
const PicaFixedGSConfig gs_config{regs, instance};
auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance);
auto& shader = it->second;
if (new_shader) {
workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() {
const std::string code = GenerateFixedGeometryShader(gs_config);
shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device);
shader.MarkDone();
});
}
current_shaders[ProgramType::GS] = &shader;
shader_hashes[ProgramType::GS] = gs_config.Hash();
return true;
}
void PipelineCache::UseTrivialGeometryShader() {
current_shaders[ProgramType::GS] = nullptr;
shader_hashes[ProgramType::GS] = 0;
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config{regs, instance};
const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance);
auto& shader = it->second;
if (new_shader) {
const bool use_spirv = Settings::values.spirv_shader_gen.GetValue();
if (use_spirv && !config.state.shadow_rendering.Value()) {
const std::vector code = GenerateFragmentShaderSPV(config);
shader.module = CompileSPV(code, instance.GetDevice());
shader.MarkDone();
} else {
workers.QueueWork([config, device = instance.GetDevice(), &shader]() {
const std::string code = GenerateFragmentShader(config);
shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device);
shader.MarkDone();
});
}
}
current_shaders[ProgramType::FS] = &shader;
shader_hashes[ProgramType::FS] = config.Hash();
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) {
auto& info = update_data[1][binding].image_info;
if (info.imageView == image_view && info.sampler == sampler) {
return;
}
set_dirty[1] = true;
info = vk::DescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
}
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
auto& info = update_data[2][binding].image_info;
if (info.imageView == image_view) {
return;
}
set_dirty[2] = true;
info = vk::DescriptorImageInfo{
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
auto& info = update_data[0][binding].buffer_info;
if (info.buffer == buffer && info.offset == offset && info.range == size) {
return;
}
set_dirty[0] = true;
info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
};
}
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
auto& view = update_data[0][binding].buffer_view;
if (view != buffer_view) {
set_dirty[0] = true;
view = buffer_view;
}
}
void PipelineCache::SetBufferOffset(u32 binding, size_t offset) {
offsets[binding] = static_cast<u32>(offset);
}
bool PipelineCache::IsCacheValid(std::span<const u8> data) const {
if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");
return false;
}
vk::PipelineCacheHeaderVersionOne header;
std::memcpy(&header, data.data(), sizeof(header));
if (header.headerSize < sizeof(header)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length");
return false;
}
if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version");
return false;
}
if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) {
LOG_ERROR(
Render_Vulkan,
"Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})",
header.vendorID, vendor_id);
return false;
}
if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) {
LOG_ERROR(
Render_Vulkan,
"Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})",
header.deviceID, device_id);
return false;
}
if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID");
return false;
}
return true;
}
bool PipelineCache::EnsureDirectories() const {
const auto create_dir = [](const std::string& dir) {
if (!FileUtil::CreateDir(dir)) {
LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir);
return false;
}
return true;
};
return create_dir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
create_dir(GetPipelineCacheDir());
}
std::string PipelineCache::GetPipelineCacheDir() const {
return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan" + DIR_SEP;
}
} // namespace Vulkan

View file

@ -0,0 +1,123 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <bitset>
#include <tsl/robin_map.h>
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Pica {
struct Regs;
}
namespace Vulkan {
class Instance;
class Scheduler;
class RenderpassCache;
class DescriptorPool;
constexpr u32 NUM_RASTERIZER_SETS = 3;
constexpr u32 NUM_DYNAMIC_OFFSETS = 2;
/**
* Stores a collection of rasterizer pipelines used during rendering.
*/
class PipelineCache {
public:
explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorPool& pool);
~PipelineCache();
[[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept {
return descriptor_set_providers[1];
}
/// Loads the pipeline cache stored to disk
void LoadDiskCache();
/// Stores the generated pipeline cache to disk
void SaveDiskCache();
/// Binds a pipeline using the provided information
bool BindPipeline(const PipelineInfo& info, bool wait_built = false);
/// Binds a PICA decompiled vertex shader
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout);
/// Binds a passthrough vertex shader
void UseTrivialVertexShader();
/// Binds a PICA decompiled geometry shader
bool UseFixedGeometryShader(const Pica::Regs& regs);
/// Binds a passthrough geometry shader
void UseTrivialGeometryShader();
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::Regs& regs);
/// Binds a texture to the specified binding
void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler);
/// Binds a storage image to the specified binding
void BindStorageImage(u32 binding, vk::ImageView image_view);
/// Binds a buffer to the specified binding
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a buffer to the specified binding
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
/// Sets the dynamic offset for the uniform buffer at binding
void SetBufferOffset(u32 binding, size_t offset);
private:
/// Builds the rasterizer pipeline layout
void BuildLayout();
/// Returns true when the disk data can be used by the current driver
bool IsCacheValid(std::span<const u8> cache_data) const;
/// Create shader disk cache directories. Returns true on success.
bool EnsureDirectories() const;
/// Returns the pipeline cache storage dir
std::string GetPipelineCacheDir() const;
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorPool& pool;
vk::UniquePipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::size_t num_worker_threads;
Common::ThreadWorker workers;
PipelineInfo current_info{};
GraphicsPipeline* current_pipeline{};
tsl::robin_map<u64, std::unique_ptr<GraphicsPipeline>, Common::IdentityHash<u64>>
graphics_pipelines;
std::array<DescriptorSetProvider, NUM_RASTERIZER_SETS> descriptor_set_providers;
std::array<DescriptorSetData, NUM_RASTERIZER_SETS> update_data{};
std::array<vk::DescriptorSet, NUM_RASTERIZER_SETS> bound_descriptor_sets{};
std::array<u32, NUM_DYNAMIC_OFFSETS> offsets{};
std::bitset<NUM_RASTERIZER_SETS> set_dirty{};
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
std::array<Shader*, MAX_SHADER_STAGES> current_shaders;
std::unordered_map<PicaVSConfig, Shader*> programmable_vertex_map;
std::unordered_map<std::string, Shader> programmable_vertex_cache;
std::unordered_map<PicaFixedGSConfig, Shader> fixed_geometry_shaders;
std::unordered_map<PicaFSConfig, Shader> fragment_shaders;
Shader trivial_vertex_shader;
};
} // namespace Vulkan

View file

@ -0,0 +1,514 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "core/frontend/emu_window.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_present_window.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include <vk_mem_alloc.h>
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
namespace Vulkan {
namespace {
bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, vk::Format format) {
const vk::FormatProperties props{physical_device.getFormatProperties(format)};
return static_cast<bool>(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst);
}
[[nodiscard]] vk::ImageSubresourceLayers MakeImageSubresourceLayers() {
return vk::ImageSubresourceLayers{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
};
}
[[nodiscard]] vk::ImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
s32 swapchain_height) {
return vk::ImageBlit{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffsets =
std::array{
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
vk::Offset3D{
.x = frame_width,
.y = frame_height,
.z = 1,
},
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffsets =
std::array{
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
vk::Offset3D{
.x = swapchain_width,
.y = swapchain_height,
.z = 1,
},
},
};
}
[[nodiscard]] vk::ImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
u32 swapchain_height) {
return vk::ImageCopy{
.srcSubresource = MakeImageSubresourceLayers(),
.srcOffset =
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
.dstSubresource = MakeImageSubresourceLayers(),
.dstOffset =
vk::Offset3D{
.x = 0,
.y = 0,
.z = 0,
},
.extent =
vk::Extent3D{
.width = std::min(frame_width, swapchain_width),
.height = std::min(frame_height, swapchain_height),
.depth = 1,
},
};
}
} // Anonymous namespace
PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& instance_,
Scheduler& scheduler_)
: emu_window{emu_window_}, instance{instance_}, scheduler{scheduler_},
surface{CreateSurface(instance.GetInstance(), emu_window)},
swapchain{instance, emu_window.GetFramebufferLayout().width,
emu_window.GetFramebufferLayout().height, surface},
graphics_queue{instance.GetGraphicsQueue()}, present_renderpass{CreateRenderpass()},
vsync_enabled{Settings::values.use_vsync_new.GetValue()},
blit_supported{
CanBlitToSwapchain(instance.GetPhysicalDevice(), swapchain.GetSurfaceFormat().format)},
use_present_thread{Settings::values.async_presentation.GetValue()},
last_render_surface{emu_window.GetWindowInfo().render_surface} {
const vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer |
vk::CommandPoolCreateFlagBits::eTransient,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
};
command_pool = device.createCommandPool(pool_info);
const vk::CommandBufferAllocateInfo alloc_info = {
.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = SWAP_CHAIN_SIZE,
};
const std::vector command_buffers = device.allocateCommandBuffers(alloc_info);
for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) {
Frame& frame = swap_chain[i];
frame.cmdbuf = command_buffers[i];
frame.render_ready = device.createSemaphore({});
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
free_queue.push(&frame);
}
if (use_present_thread) {
present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
}
}
PresentWindow::~PresentWindow() {
scheduler.Finish();
const vk::Device device = instance.GetDevice();
device.destroyCommandPool(command_pool);
device.destroyRenderPass(present_renderpass);
for (auto& frame : swap_chain) {
device.destroyImageView(frame.image_view);
device.destroyFramebuffer(frame.framebuffer);
device.destroySemaphore(frame.render_ready);
device.destroyFence(frame.present_done);
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
}
}
void PresentWindow::RecreateFrame(Frame* frame, u32 width, u32 height) {
vk::Device device = instance.GetDevice();
if (frame->framebuffer) {
device.destroyFramebuffer(frame->framebuffer);
}
if (frame->image_view) {
device.destroyImageView(frame->image_view);
}
if (frame->image) {
vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation);
}
const vk::Format format = swapchain.GetSurfaceFormat().format;
const vk::ImageCreateInfo image_info = {
.imageType = vk::ImageType::e2D,
.format = format,
.extent = {width, height, 1},
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc,
};
const VmaAllocationCreateInfo alloc_info = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
};
VkImage unsafe_image{};
VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info,
&unsafe_image, &frame->allocation, nullptr);
if (result != VK_SUCCESS) [[unlikely]] {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE();
}
frame->image = vk::Image{unsafe_image};
const vk::ImageViewCreateInfo view_info = {
.image = frame->image,
.viewType = vk::ImageViewType::e2D,
.format = format,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
frame->image_view = device.createImageView(view_info);
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = present_renderpass,
.attachmentCount = 1,
.pAttachments = &frame->image_view,
.width = width,
.height = height,
.layers = 1,
};
frame->framebuffer = instance.GetDevice().createFramebuffer(framebuffer_info);
frame->width = width;
frame->height = height;
}
Frame* PresentWindow::GetRenderFrame() {
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
// Wait for free presentation frames
std::unique_lock lock{free_mutex};
free_cv.wait(lock, [this] { return !free_queue.empty(); });
// Take the frame from the queue
Frame* frame = free_queue.front();
free_queue.pop();
vk::Device device = instance.GetDevice();
vk::Result result{};
const auto wait = [&]() {
result = device.waitForFences(frame->present_done, false, std::numeric_limits<u64>::max());
return result;
};
// Wait for the presentation to be finished so all frame resources are free
while (wait() != vk::Result::eSuccess) {
// Retry if the waiting times out
if (result == vk::Result::eTimeout) {
continue;
}
// eErrorInitializationFailed occurs on Mali GPU drivers due to them
// using the ppoll() syscall which isn't correctly restarted after a signal,
// we need to manually retry waiting in that case
if (result == vk::Result::eErrorInitializationFailed) {
continue;
}
}
device.resetFences(frame->present_done);
return frame;
}
void PresentWindow::Present(Frame* frame) {
if (!use_present_thread) {
scheduler.WaitWorker();
CopyToSwapchain(frame);
free_queue.push(frame);
return;
}
scheduler.Record([this, frame](vk::CommandBuffer) {
std::unique_lock lock{queue_mutex};
present_queue.push(frame);
frame_cv.notify_one();
});
}
void PresentWindow::WaitPresent() {
if (!use_present_thread) {
return;
}
// Wait for the present queue to be empty
{
std::unique_lock queue_lock{queue_mutex};
frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
}
// The above condition will be satisfied when the last frame is taken from the queue.
// To ensure that frame has been presented as well take hold of the swapchain
// mutex.
std::scoped_lock swapchain_lock{swapchain_mutex};
}
void PresentWindow::PresentThread(std::stop_token token) {
Common::SetCurrentThreadName("VulkanPresent");
while (!token.stop_requested()) {
std::unique_lock lock{queue_mutex};
// Wait for presentation frames
Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
if (token.stop_requested()) {
return;
}
// Take the frame and notify anyone waiting
Frame* frame = present_queue.front();
present_queue.pop();
frame_cv.notify_one();
// By exchanging the lock ownership we take the swapchain lock
// before the queue lock goes out of scope. This way the swapchain
// lock in WaitPresent is guaranteed to occur after here.
std::exchange(lock, std::unique_lock{swapchain_mutex});
CopyToSwapchain(frame);
// Free the frame for reuse
std::scoped_lock fl{free_mutex};
free_queue.push(frame);
free_cv.notify_one();
}
}
void PresentWindow::NotifySurfaceChanged() {
#ifdef ANDROID
std::scoped_lock lock{recreate_surface_mutex};
recreate_surface_cv.notify_one();
#endif
}
void PresentWindow::CopyToSwapchain(Frame* frame) {
const auto recreate_swapchain = [&] { swapchain.Create(frame->width, frame->height, surface); };
#ifdef ANDROID
std::unique_lock lock{recreate_surface_mutex};
recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), [&]() {
return last_render_surface == emu_window.GetWindowInfo().render_surface;
});
// If the frontend recreated the surface, recreate the renderer surface and swapchain.
void* const render_surface = emu_window.GetWindowInfo().render_surface;
if (last_render_surface != render_surface) {
last_render_surface = render_surface;
surface = CreateSurface(instance.GetInstance(), emu_window);
recreate_swapchain();
}
#else
const bool use_vsync = Settings::values.use_vsync_new.GetValue();
const bool size_changed =
swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
const bool vsync_changed = vsync_enabled != use_vsync;
if (vsync_changed || size_changed) [[unlikely]] {
vsync_enabled = use_vsync;
recreate_swapchain();
}
#endif
while (!swapchain.AcquireNextImage()) {
recreate_swapchain();
}
const vk::Image swapchain_image = swapchain.Image();
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
};
const vk::CommandBuffer cmdbuf = frame->cmdbuf;
cmdbuf.begin(begin_info);
const vk::Extent2D extent = swapchain.GetExtent();
const std::array pre_barriers{
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eNone,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eUndefined,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = frame->image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
};
const vk::ImageMemoryBarrier post_barrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = vk::ImageLayout::ePresentSrcKHR,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = swapchain_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
{}, {}, pre_barriers);
if (blit_supported) {
cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
vk::ImageLayout::eTransferDstOptimal,
MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
vk::Filter::eLinear);
} else {
cmdbuf.copyImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image,
vk::ImageLayout::eTransferDstOptimal,
MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
}
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
cmdbuf.end();
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eAllGraphics,
};
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
const std::array wait_semaphores = {image_acquired, frame->render_ready};
vk::SubmitInfo submit_info = {
.waitSemaphoreCount = static_cast<u32>(wait_semaphores.size()),
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &present_ready,
};
std::scoped_lock submit_lock{scheduler.submit_mutex};
try {
graphics_queue.submit(submit_info, frame->present_done);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what());
UNREACHABLE();
}
swapchain.Present();
}
vk::RenderPass PresentWindow::CreateRenderpass() {
const vk::AttachmentReference color_ref = {
.attachment = 0,
.layout = vk::ImageLayout::eGeneral,
};
const vk::SubpassDescription subpass = {
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = 1u,
.pColorAttachments = &color_ref,
.pResolveAttachments = 0,
.pDepthStencilAttachment = nullptr,
};
const vk::AttachmentDescription color_attachment = {
.format = swapchain.GetSurfaceFormat().format,
.loadOp = vk::AttachmentLoadOp::eClear,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
.initialLayout = vk::ImageLayout::eUndefined,
.finalLayout = vk::ImageLayout::eTransferSrcOptimal,
};
const vk::RenderPassCreateInfo renderpass_info = {
.attachmentCount = 1,
.pAttachments = &color_attachment,
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr,
};
return instance.GetDevice().createRenderPass(renderpass_info);
}
} // namespace Vulkan

View file

@ -0,0 +1,101 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <queue>
#include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
VK_DEFINE_HANDLE(VmaAllocation)
namespace Frontend {
class EmuWindow;
}
namespace Vulkan {
class Instance;
class Swapchain;
class Scheduler;
class RenderpassCache;
struct Frame {
u32 width;
u32 height;
VmaAllocation allocation;
vk::Framebuffer framebuffer;
vk::Image image;
vk::ImageView image_view;
vk::Semaphore render_ready;
vk::Fence present_done;
vk::CommandBuffer cmdbuf;
};
class PresentWindow final {
static constexpr std::size_t SWAP_CHAIN_SIZE = 6;
public:
explicit PresentWindow(Frontend::EmuWindow& emu_window, const Instance& instance,
Scheduler& scheduler);
~PresentWindow();
/// Waits for all queued frames to finish presenting.
void WaitPresent();
/// Returns the last used render frame.
Frame* GetRenderFrame();
/// Recreates the render frame to match provided parameters.
void RecreateFrame(Frame* frame, u32 width, u32 height);
/// Queues the provided frame for presentation.
void Present(Frame* frame);
/// This is called to notify the rendering backend of a surface change
void NotifySurfaceChanged();
[[nodiscard]] vk::RenderPass Renderpass() const noexcept {
return present_renderpass;
}
u32 ImageCount() const noexcept {
return swapchain.GetImageCount();
}
private:
void PresentThread(std::stop_token token);
void CopyToSwapchain(Frame* frame);
vk::RenderPass CreateRenderpass();
private:
Frontend::EmuWindow& emu_window;
const Instance& instance;
Scheduler& scheduler;
vk::SurfaceKHR surface;
Swapchain swapchain;
vk::CommandPool command_pool;
vk::Queue graphics_queue;
vk::RenderPass present_renderpass;
std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
std::queue<Frame*> free_queue;
std::queue<Frame*> present_queue;
std::condition_variable free_cv;
std::condition_variable recreate_surface_cv;
std::condition_variable_any frame_cv;
std::mutex swapchain_mutex;
std::mutex recreate_surface_mutex;
std::mutex queue_mutex;
std::mutex free_mutex;
std::jthread present_thread;
bool vsync_enabled{};
bool blit_supported;
bool use_present_thread{true};
void* last_render_surface{};
};
} // namespace Vulkan

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,171 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/hw/gpu.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Frontend {
class EmuWindow;
}
namespace VideoCore {
class CustomTexManager;
class RendererBase;
} // namespace VideoCore
namespace Vulkan {
struct ScreenInfo;
class Instance;
class Scheduler;
class RenderpassCache;
class DescriptorPool;
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Memory::MemorySystem& memory,
VideoCore::CustomTexManager& custom_tex_manager,
VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window,
const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache, u32 image_count);
~RasterizerVulkan() override;
void TickFrame();
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void DrawTriangles() override;
void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
void ClearAll(bool flush) override;
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr,
u32 pixel_stride, ScreenInfo& screen_info);
bool AccelerateDrawBatch(bool is_indexed) override;
void SyncFixedState() override;
private:
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
/// Syncs the blend functions to match the PICA register
void SyncBlendFuncs();
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
/// Syncs the color write mask to match the PICA register state
void SyncColorWriteMask();
/// Syncs the stencil write mask to match the PICA register state
void SyncStencilWriteMask();
/// Syncs the depth write mask to match the PICA register state
void SyncDepthWriteMask();
/// Syncs the stencil test states to match the PICA register
void SyncStencilTest();
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
/// Syncs all enabled PICA texture units
void SyncTextureUnits(const Framebuffer* framebuffer);
/// Binds the PICA shadow cube required for shadow mapping
void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture);
/// Binds a texture cube to texture unit 0
void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture);
/// Makes a temporary copy of the framebuffer if a feedback loop is detected
bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface,
Sampler& sampler);
/// Unbinds all special texture unit 0 texture configurations
void UnbindSpecial();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
/// Generic draw function for DrawTriangles and AccelerateDrawBatch
bool Draw(bool accelerate, bool is_indexed);
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
/// Setup index array for AccelerateDrawBatch
void SetupIndexArray();
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray();
/// Setup the fixed attribute emulation in vulkan
void SetupFixedAttribs();
/// Setup vertex shader for AccelerateDrawBatch
bool SetupVertexShader();
/// Setup geometry shader for AccelerateDrawBatch
bool SetupGeometryShader();
/// Creates the vertex layout struct used for software shader pipelines
void MakeSoftwareVertexLayout();
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
PipelineCache pipeline_cache;
TextureRuntime runtime;
RasterizerCache res_cache;
VertexLayout software_layout;
std::array<u32, 16> binding_offsets{};
std::array<bool, 16> enable_attributes{};
std::array<vk::Buffer, 16> vertex_buffers;
VertexArrayInfo vertex_info;
PipelineInfo pipeline_info;
StreamBuffer stream_buffer; ///< Vertex+Index buffer
StreamBuffer uniform_buffer; ///< Uniform buffer
StreamBuffer texture_buffer; ///< Texture buffer
StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer
vk::UniqueBufferView texture_lf_view;
vk::UniqueBufferView texture_rg_view;
vk::UniqueBufferView texture_rgba_view;
u64 uniform_buffer_alignment;
u64 uniform_size_aligned_vs;
u64 uniform_size_aligned_fs;
bool async_shaders{false};
};
} // namespace Vulkan

View file

@ -0,0 +1,10 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace VideoCore {
template class RasterizerCache<Vulkan::Traits>;
} // namespace VideoCore

View file

@ -0,0 +1,211 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <limits>
#include "common/assert.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
using VideoCore::PixelFormat;
using VideoCore::SurfaceType;
RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {}
RenderpassCache::~RenderpassCache() = default;
void RenderpassCache::BeginRendering(const Framebuffer* framebuffer,
Common::Rectangle<u32> draw_rect) {
const vk::Rect2D render_area = {
.offset{
.x = static_cast<s32>(draw_rect.left),
.y = static_cast<s32>(draw_rect.bottom),
},
.extent{
.width = draw_rect.GetWidth(),
.height = draw_rect.GetHeight(),
},
};
const RenderPass new_pass = {
.framebuffer = framebuffer->Handle(),
.render_pass = framebuffer->RenderPass(),
.render_area = render_area,
.clear = {},
.do_clear = false,
};
images = framebuffer->Images();
aspects = framebuffer->Aspects();
BeginRendering(new_pass);
}
void RenderpassCache::BeginRendering(const RenderPass& new_pass) {
if (pass == new_pass) [[likely]] {
return;
}
EndRendering();
scheduler.Record([info = new_pass](vk::CommandBuffer cmdbuf) {
const vk::RenderPassBeginInfo renderpass_begin_info = {
.renderPass = info.render_pass,
.framebuffer = info.framebuffer,
.renderArea = info.render_area,
.clearValueCount = info.do_clear ? 1u : 0u,
.pClearValues = &info.clear,
};
cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
});
pass = new_pass;
}
void RenderpassCache::EndRendering() {
if (!pass.render_pass) {
return;
}
pass.render_pass = vk::RenderPass{};
scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) {
u32 num_barriers = 0;
vk::PipelineStageFlags pipeline_flags{};
std::array<vk::ImageMemoryBarrier, 2> barriers;
for (u32 i = 0; i < images.size(); i++) {
if (!images[i]) {
continue;
}
const bool is_color = static_cast<bool>(aspects[i] & vk::ImageAspectFlagBits::eColor);
if (is_color) {
pipeline_flags |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
} else {
pipeline_flags |= vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
}
barriers[num_barriers++] = vk::ImageMemoryBarrier{
.srcAccessMask = is_color ? vk::AccessFlagBits::eColorAttachmentWrite
: vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask =
vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = images[i],
.subresourceRange{
.aspectMask = aspects[i],
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
}
cmdbuf.endRenderPass();
cmdbuf.pipelineBarrier(pipeline_flags,
vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, 0, nullptr, 0, nullptr,
num_barriers, barriers.data());
});
}
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color,
VideoCore::PixelFormat depth, bool is_clear) {
std::scoped_lock lock{cache_mutex};
const u32 color_index =
color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast<u32>(color);
const u32 depth_index = depth == VideoCore::PixelFormat::Invalid
? MAX_DEPTH_FORMATS
: (static_cast<u32>(depth) - 14);
ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS,
"Invalid color index {} and/or depth_index {}", color_index, depth_index);
vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear];
if (!renderpass) {
const vk::Format color_format = instance.GetTraits(color).native;
const vk::Format depth_format = instance.GetTraits(depth).native;
const vk::AttachmentLoadOp load_op =
is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad;
renderpass = CreateRenderPass(color_format, depth_format, load_op);
}
return *renderpass;
}
vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth,
vk::AttachmentLoadOp load_op) const {
u32 attachment_count = 0;
std::array<vk::AttachmentDescription, 2> attachments;
bool use_color = false;
vk::AttachmentReference color_attachment_ref{};
bool use_depth = false;
vk::AttachmentReference depth_attachment_ref{};
if (color != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = color,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
.stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
.initialLayout = vk::ImageLayout::eGeneral,
.finalLayout = vk::ImageLayout::eGeneral,
};
color_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eGeneral,
};
use_color = true;
}
if (depth != vk::Format::eUndefined) {
attachments[attachment_count] = vk::AttachmentDescription{
.format = depth,
.loadOp = load_op,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = load_op,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = vk::ImageLayout::eGeneral,
.finalLayout = vk::ImageLayout::eGeneral,
};
depth_attachment_ref = vk::AttachmentReference{
.attachment = attachment_count++,
.layout = vk::ImageLayout::eGeneral,
};
use_depth = true;
}
const vk::SubpassDescription subpass = {
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
.inputAttachmentCount = 0,
.pInputAttachments = nullptr,
.colorAttachmentCount = use_color ? 1u : 0u,
.pColorAttachments = &color_attachment_ref,
.pResolveAttachments = 0,
.pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr,
};
const vk::RenderPassCreateInfo renderpass_info = {
.attachmentCount = attachment_count,
.pAttachments = attachments.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 0,
.pDependencies = nullptr,
};
return instance.GetDevice().createRenderPassUnique(renderpass_info);
}
} // namespace Vulkan

View file

@ -0,0 +1,73 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include "common/math_util.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
enum class PixelFormat : u32;
}
namespace Vulkan {
class Instance;
class Scheduler;
class Framebuffer;
struct RenderPass {
vk::Framebuffer framebuffer;
vk::RenderPass render_pass;
vk::Rect2D render_area;
vk::ClearValue clear;
bool do_clear;
bool operator==(const RenderPass& other) const noexcept {
return std::tie(framebuffer, render_pass, render_area, do_clear) ==
std::tie(other.framebuffer, other.render_pass, other.render_area,
other.do_clear) &&
std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0;
}
};
class RenderpassCache {
static constexpr size_t MAX_COLOR_FORMATS = 5;
static constexpr size_t MAX_DEPTH_FORMATS = 4;
public:
explicit RenderpassCache(const Instance& instance, Scheduler& scheduler);
~RenderpassCache();
/// Begins a new renderpass with the provided framebuffer as render target.
void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle<u32> draw_rect);
/// Begins a new renderpass with the provided render state.
void BeginRendering(const RenderPass& new_pass);
/// Exits from any currently active renderpass instance
void EndRendering();
/// Returns the renderpass associated with the color-depth format pair
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear);
private:
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
vk::UniqueRenderPass CreateRenderPass(vk::Format color, vk::Format depth,
vk::AttachmentLoadOp load_op) const;
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2];
std::mutex cache_mutex;
std::array<vk::Image, 2> images;
std::array<vk::ImageAspectFlags, 2> aspects;
RenderPass pass{};
};
} // namespace Vulkan

View file

@ -0,0 +1,113 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstddef>
#include <optional>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, size_t grow_step_)
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
std::size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](std::size_t begin,
std::size_t end) -> std::optional<std::size_t> {
for (std::size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
std::optional<std::size_t> found = search(hint_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const std::size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
std::size_t ResourcePool::ManageOverflow() {
const std::size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
}
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
std::array<vk::CommandBuffer, COMMAND_BUFFER_POOL_SIZE> cmdbufs;
};
CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore)
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {}
CommandPool::~CommandPool() {
vk::Device device = instance.GetDevice();
for (Pool& pool : pools) {
device.destroyCommandPool(pool.handle);
}
}
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
const vk::CommandPoolCreateInfo pool_create_info = {
.flags = vk::CommandPoolCreateFlagBits::eTransient |
vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
};
vk::Device device = instance.GetDevice();
pool.handle = device.createCommandPool(pool_create_info);
const vk::CommandBufferAllocateInfo buffer_alloc_info = {
.commandPool = pool.handle,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = COMMAND_BUFFER_POOL_SIZE,
};
auto buffers = device.allocateCommandBuffers(buffer_alloc_info);
std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin());
}
vk::CommandBuffer CommandPool::Commit() {
const std::size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
}
} // namespace Vulkan

View file

@ -0,0 +1,67 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore* master_semaphore, std::size_t grow_step);
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
std::size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(std::size_t begin, std::size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
std::size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
protected:
MasterSemaphore* master_semaphore{nullptr};
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(const Instance& instance, MasterSemaphore* master_semaphore);
~CommandPool() override;
void Allocate(std::size_t begin, std::size_t end) override;
vk::CommandBuffer Commit();
private:
struct Pool;
const Instance& instance;
std::vector<Pool> pools;
};
} // namespace Vulkan

View file

@ -0,0 +1,203 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <mutex>
#include <utility>
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
namespace Vulkan {
namespace {
std::unique_ptr<MasterSemaphore> MakeMasterSemaphore(const Instance& instance) {
if (instance.IsTimelineSemaphoreSupported()) {
return std::make_unique<MasterSemaphoreTimeline>(instance);
} else {
return std::make_unique<MasterSemaphoreFence>(instance);
}
}
} // Anonymous namespace
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(cmdbuf);
command->~Command();
command = next;
}
submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
}
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache)
: renderpass_cache{renderpass_cache}, master_semaphore{MakeMasterSemaphore(instance)},
command_pool{instance, master_semaphore.get()}, use_worker_thread{
!Settings::values.renderer_debug} {
AllocateWorkerCommandBuffers();
if (use_worker_thread) {
AcquireNewChunk();
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
}
Scheduler::~Scheduler() = default;
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
// When flushing, we only send data to the worker thread; no waiting is necessary.
SubmitExecution(signal, wait);
}
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
// When finishing, we need to wait for the submission to have executed on the device.
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal, wait);
Wait(presubmit_tick);
}
void Scheduler::WaitWorker() {
if (!use_worker_thread) {
return;
}
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
// Ensure the queue is drained.
{
std::unique_lock ql{queue_mutex};
event_cv.wait(ql, [this] { return work_queue.empty(); });
}
// Now wait for execution to finish.
// This needs to be done in the same order as WorkerThread.
std::scoped_lock el{execution_mutex};
}
void Scheduler::Wait(u64 tick) {
if (tick >= master_semaphore->CurrentTick()) {
// Make sure we are not waiting for the current tick without signalling
Flush();
}
master_semaphore->Wait(tick);
}
void Scheduler::DispatchWork() {
if (!use_worker_thread || chunk->Empty()) {
return;
}
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
}
event_cv.notify_all();
AcquireNewChunk();
}
void Scheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("VulkanWorker");
const auto TryPopQueue{[this](auto& work) -> bool {
if (work_queue.empty()) {
return false;
}
work = std::move(work_queue.front());
work_queue.pop();
event_cv.notify_all();
return true;
}};
while (!stop_token.stop_requested()) {
std::unique_ptr<CommandChunk> work;
{
std::unique_lock lk{queue_mutex};
// Wait for work.
Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); });
// If we've been asked to stop, we're done.
if (stop_token.stop_requested()) {
return;
}
// Exchange lock ownership so that we take the execution lock before
// the queue lock goes out of scope. This allows us to force execution
// to complete in the next step.
std::exchange(lk, std::unique_lock{execution_mutex});
// Perform the work, tracking whether the chunk was a submission
// before executing.
const bool has_submit = work->HasSubmit();
work->ExecuteAll(current_cmdbuf);
// If the chunk was a submission, reallocate the command buffer.
if (has_submit) {
AllocateWorkerCommandBuffers();
}
}
{
std::scoped_lock rl{reserve_mutex};
// Recycle the chunk back to the reserve.
chunk_reserve.emplace_back(std::move(work));
}
}
}
void Scheduler::AllocateWorkerCommandBuffers() {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
};
current_cmdbuf = command_pool.Commit();
current_cmdbuf.begin(begin_info);
}
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
state = StateFlags::AllDirty;
const u64 signal_value = master_semaphore->NextTick();
renderpass_cache.EndRendering();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_Submit);
std::scoped_lock lock{submit_mutex};
master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value);
});
if (!use_worker_thread) {
AllocateWorkerCommandBuffers();
} else {
chunk->MarkSubmit();
DispatchWork();
}
}
void Scheduler::AcquireNewChunk() {
std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.back());
chunk_reserve.pop_back();
}
} // namespace Vulkan

View file

@ -0,0 +1,210 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <utility>
#include "common/alignment.h"
#include "common/common_funcs.h"
#include "common/logging/log.h"
#include "common/polyfill_thread.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
enum class StateFlags {
AllDirty = 0,
Renderpass = 1 << 0,
Pipeline = 1 << 1,
DescriptorSets = 1 << 2
};
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
class Instance;
class RenderpassCache;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache);
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick);
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Records the command to the current chunk.
template <typename T>
void Record(T&& command) {
if (!use_worker_thread) {
command(current_cmdbuf);
return;
}
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Marks the provided state as non dirty
void MarkStateNonDirty(StateFlags flag) noexcept {
state |= flag;
}
/// Marks the provided state as dirty
void MakeDirty(StateFlags flag) noexcept {
state &= ~flag;
}
/// Returns true if the state is dirty
[[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept {
return False(state & flag);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore->IsFree(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore* GetMasterSemaphore() noexcept {
return master_semaphore.get();
}
std::mutex submit_mutex;
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer cmdbuf) const override {
command(cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
recorded_counts++;
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
void MarkSubmit() {
submit = true;
}
bool Empty() const {
return recorded_counts == 0;
}
bool HasSubmit() const {
return submit;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t recorded_counts = 0;
std::size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
};
private:
void WorkerThread(std::stop_token stop_token);
void AllocateWorkerCommandBuffers();
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
void AcquireNewChunk();
private:
RenderpassCache& renderpass_cache;
std::unique_ptr<MasterSemaphore> master_semaphore;
CommandPool command_pool;
std::unique_ptr<CommandChunk> chunk;
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
vk::CommandBuffer current_cmdbuf;
StateFlags state{};
std::mutex execution_mutex;
std::mutex reserve_mutex;
std::mutex queue_mutex;
std::condition_variable_any event_cv;
std::jthread worker_thread;
bool use_worker_thread;
};
} // namespace Vulkan

View file

@ -1511,10 +1511,11 @@ vec4 secondary_fragment_color = vec4(0.0);
"gl_FragCoord.y < float(scissor_y2))) discard;\n";
}
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
// do our own transformation according to PICA specification.
out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"
// The PICA depth range is [-1, 0] while in Vulkan that range is [0, 1].
// Thus in the vertex shader we flip the sign of the z component to place
// it in the correct range. Here we undo the transformation to get the original z_over_w,
// then do our own transformation according to PICA specification.
out += "float z_over_w = -gl_FragCoord.z;\n"
"float depth = z_over_w * depth_scale + depth_offset;\n";
if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
out += "depth /= gl_FragCoord.w;\n";
@ -1661,8 +1662,7 @@ void main() {
texcoord0_w = vert_texcoord0_w;
normquat = vert_normquat;
view = vert_view;
gl_Position = vert_position;
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
)";
if (use_clip_planes) {
out += R"(
@ -1780,8 +1780,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
semantic(VSOutputAttributes::POSITION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n";
out += " gl_Position = vtx_pos;\n";
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n";
if (config.use_clip_planes) {
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
out += " if (enable_clip1) {\n";
@ -1867,8 +1866,7 @@ struct Vertex {
semantic(VSOutputAttributes::POSITION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n";
out += " gl_Position = vtx_pos;\n";
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n";
if (use_clip_planes) {
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
out += " if (enable_clip1) {\n";

View file

@ -115,7 +115,7 @@ void FragmentModule::WriteDepth() {
const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)};
const Id gl_frag_coord_z{
OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))};
const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))};
const Id z_over_w{OpFNegate(f32_id, gl_frag_coord_z)};
const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset);

View file

@ -160,7 +160,7 @@ bool InitializeCompiler() {
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device) {
if (!InitializeCompiler()) {
return VK_NULL_HANDLE;
return {};
}
EProfile profile = ECoreProfile;
@ -182,7 +182,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
includer)) [[unlikely]] {
LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
shader->getInfoDebugLog());
return VK_NULL_HANDLE;
return {};
}
// Even though there's only a single shader, we still need to link it to generate SPV
@ -191,7 +191,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
if (!program->link(messages)) {
LOG_INFO(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(),
program->getInfoDebugLog());
return VK_NULL_HANDLE;
return {};
}
glslang::TIntermediate* intermediate = program->getIntermediate(lang);
@ -227,7 +227,7 @@ vk::ShaderModule CompileSPV(std::span<const u32> code, vk::Device device) {
UNREACHABLE_MSG("{}", err.what());
}
return VK_NULL_HANDLE;
return {};
}
} // namespace Vulkan

View file

@ -0,0 +1,201 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
namespace Vulkan {
namespace {
std::string_view BufferTypeName(BufferType type) {
switch (type) {
case BufferType::Upload:
return "Upload";
case BufferType::Download:
return "Download";
case BufferType::Stream:
return "Stream";
default:
return "Invalid";
}
}
vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
switch (type) {
case BufferType::Upload:
return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
case BufferType::Download:
return vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached;
case BufferType::Stream:
return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent;
default:
UNREACHABLE_MSG("Unknown buffer type {}", type);
return vk::MemoryPropertyFlagBits::eHostVisible;
}
}
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
vk::MemoryPropertyFlags wanted) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type.
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
vk::MemoryPropertyFlags flags = MakePropertyFlags(type);
std::optional preferred_type = FindMemoryType(properties, flags);
constexpr std::array remove_flags = {
vk::MemoryPropertyFlagBits::eHostCached,
vk::MemoryPropertyFlagBits::eHostCoherent,
};
for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) {
flags &= ~remove_flags[i];
preferred_type = FindMemoryType(properties, flags);
}
ASSERT_MSG(preferred_type, "No suitable memory type found");
return preferred_type.value();
}
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
} // Anonymous namespace
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
vk::BufferUsageFlags usage_, u64 size, BufferType type_)
: instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()},
stream_buffer_size{size}, usage{usage_}, type{type_} {
CreateBuffers(size);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
StreamBuffer::~StreamBuffer() {
device.unmapMemory(memory);
device.destroyBuffer(buffer);
device.freeMemory(memory);
}
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
bool invalidate{false};
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidate = true;
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
}
const u64 mapped_upper_bound = offset + size;
WaitPendingOperations(mapped_upper_bound);
return std::make_tuple(mapped + offset, offset, invalidate);
}
void StreamBuffer::Commit(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
size);
const vk::MappedMemoryRange range = {
.memory = memory,
.offset = offset,
.size = size,
};
if (!is_coherent && type == BufferType::Download) {
device.invalidateMappedMemoryRanges(range);
} else if (!is_coherent) {
device.flushMappedMemoryRanges(range);
}
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void StreamBuffer::CreateBuffers(u64 prefered_size) {
const vk::Device device = instance.GetDevice();
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties, type);
const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type];
const u32 preferred_heap = mem_type.heapIndex;
is_coherent =
static_cast<bool>(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent);
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.createBuffer({
.size = std::min(prefered_size, allocable_size),
.usage = usage,
});
const auto requirements = device.getBufferMemoryRequirements(buffer);
stream_buffer_size = static_cast<u64>(requirements.size);
LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KB with flags {}",
BufferTypeName(type), stream_buffer_size / 1024,
vk::to_string(mem_type.propertyFlags));
memory = device.allocateMemory({
.allocationSize = requirements.size,
.memoryTypeIndex = preferred_type,
});
device.bindBufferMemory(buffer, memory, 0);
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
} // namespace Vulkan

View file

@ -0,0 +1,86 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <span>
#include <tuple>
#include <vector>
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
enum class BufferType : u32 {
Upload = 0,
Download = 1,
Stream = 2,
};
class Instance;
class Scheduler;
class StreamBuffer final {
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
public:
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
vk::BufferUsageFlags usage, u64 size,
BufferType type = BufferType::Stream);
~StreamBuffer();
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);
vk::Buffer Handle() const noexcept {
return buffer;
}
private:
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(u64 prefered_size);
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
private:
const Instance& instance; ///< Vulkan instance.
Scheduler& scheduler; ///< Command scheduler.
vk::Device device;
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u8* mapped{}; ///< Pointer to the mapped memory
u64 stream_buffer_size{}; ///< Stream buffer size.
vk::BufferUsageFlags usage{};
BufferType type;
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
bool is_coherent{}; ///< True if the buffer is coherent
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan

View file

@ -0,0 +1,236 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245));
MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245));
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance_, u32 width, u32 height, vk::SurfaceKHR surface_)
: instance{instance_}, surface{surface_} {
FindPresentFormat();
SetPresentMode();
Create(width, height, surface);
}
Swapchain::~Swapchain() {
Destroy();
instance.GetInstance().destroySurfaceKHR(surface);
}
void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) {
width = width_;
height = height_;
surface = surface_;
needs_recreation = false;
Destroy();
SetPresentMode();
SetSurfaceProperties();
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
instance.GetPresentQueueFamilyIndex(),
};
const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
const u32 queue_family_indices_count = exclusive ? 1u : 2u;
const vk::SharingMode sharing_mode =
exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent;
const vk::SwapchainCreateInfoKHR swapchain_info = {
.surface = surface,
.minImageCount = image_count,
.imageFormat = surface_format.format,
.imageColorSpace = surface_format.colorSpace,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = vk::ImageUsageFlagBits::eColorAttachment |
vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.imageSharingMode = sharing_mode,
.queueFamilyIndexCount = queue_family_indices_count,
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.compositeAlpha = composite_alpha,
.presentMode = present_mode,
.clipped = true,
.oldSwapchain = nullptr,
};
try {
swapchain = instance.GetDevice().createSwapchainKHR(swapchain_info);
} catch (vk::SystemError& err) {
LOG_CRITICAL(Render_Vulkan, "{}", err.what());
UNREACHABLE();
}
SetupImages();
RefreshSemaphores();
}
bool Swapchain::AcquireNextImage() {
MICROPROFILE_SCOPE(Vulkan_Acquire);
vk::Device device = instance.GetDevice();
vk::Result result =
device.acquireNextImageKHR(swapchain, std::numeric_limits<u64>::max(),
image_acquired[frame_index], VK_NULL_HANDLE, &image_index);
switch (result) {
case vk::Result::eSuccess:
break;
case vk::Result::eSuboptimalKHR:
case vk::Result::eErrorOutOfDateKHR:
needs_recreation = true;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Swapchain acquire returned unknown result {}", result);
UNREACHABLE();
break;
}
return !needs_recreation;
}
void Swapchain::Present() {
if (needs_recreation) {
return;
}
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &present_ready[image_index],
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &image_index,
};
MICROPROFILE_SCOPE(Vulkan_Present);
try {
[[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info);
} catch (vk::OutOfDateKHRError&) {
needs_recreation = true;
} catch (const vk::SystemError& err) {
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what());
UNREACHABLE();
}
frame_index = (frame_index + 1) % image_count;
}
void Swapchain::FindPresentFormat() {
const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
// If there is a single undefined surface format, the device doesn't care, so we'll just use
// RGBA.
if (formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eR8G8B8A8Unorm;
surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
return;
}
// Try to find a suitable format.
for (const vk::SurfaceFormatKHR& sformat : formats) {
vk::Format format = sformat.format;
if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) {
continue;
}
surface_format.format = format;
surface_format.colorSpace = sformat.colorSpace;
return;
}
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
UNREACHABLE();
}
void Swapchain::SetPresentMode() {
present_mode = vk::PresentModeKHR::eFifo;
if (!Settings::values.use_vsync_new) {
const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface);
const auto find_mode = [&modes](vk::PresentModeKHR requested) {
auto it =
std::find_if(modes.begin(), modes.end(),
[&requested](vk::PresentModeKHR mode) { return mode == requested; });
return it != modes.end();
};
const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox);
present_mode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate;
}
}
void Swapchain::SetSurfaceProperties() {
const vk::SurfaceCapabilitiesKHR capabilities =
instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
extent.width = std::max(capabilities.minImageExtent.width,
std::min(capabilities.maxImageExtent.width, width));
extent.height = std::max(capabilities.minImageExtent.height,
std::min(capabilities.maxImageExtent.height, height));
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
image_count = capabilities.minImageCount + 1;
if (capabilities.maxImageCount > 0) {
image_count = std::min(image_count, capabilities.maxImageCount);
}
// Prefer identity transform if possible
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {
transform = capabilities.currentTransform;
}
// Opaque is not supported everywhere.
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eOpaque;
if (!(capabilities.supportedCompositeAlpha & vk::CompositeAlphaFlagBitsKHR::eOpaque)) {
composite_alpha = vk::CompositeAlphaFlagBitsKHR::eInherit;
}
}
void Swapchain::Destroy() {
vk::Device device = instance.GetDevice();
if (swapchain) {
device.destroySwapchainKHR(swapchain);
}
for (u32 i = 0; i < image_count; i++) {
device.destroySemaphore(image_acquired[i]);
device.destroySemaphore(present_ready[i]);
}
image_acquired.clear();
present_ready.clear();
}
void Swapchain::RefreshSemaphores() {
const vk::Device device = instance.GetDevice();
image_acquired.resize(image_count);
present_ready.resize(image_count);
for (vk::Semaphore& semaphore : image_acquired) {
semaphore = device.createSemaphore({});
}
for (vk::Semaphore& semaphore : present_ready) {
semaphore = device.createSemaphore({});
}
}
void Swapchain::SetupImages() {
vk::Device device = instance.GetDevice();
images = device.getSwapchainImagesKHR(swapchain);
image_count = static_cast<u32>(images.size());
}
} // namespace Vulkan

View file

@ -0,0 +1,110 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <mutex>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class Scheduler;
class Swapchain {
public:
explicit Swapchain(const Instance& instance, u32 width, u32 height, vk::SurfaceKHR surface);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height, vk::SurfaceKHR surface);
/// Acquires the next image in the swapchain.
bool AcquireNextImage();
/// Presents the current image and move to the next one
void Present();
vk::SurfaceKHR GetSurface() const {
return surface;
}
vk::Image Image() const {
return images[image_index];
}
vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
vk::SwapchainKHR GetHandle() const {
return swapchain;
}
u32 GetWidth() const {
return width;
}
u32 GetHeight() const {
return height;
}
u32 GetImageCount() const {
return image_count;
}
vk::Extent2D GetExtent() const {
return extent;
}
[[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const {
return image_acquired[frame_index];
}
[[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const {
return present_ready[image_index];
}
private:
/// Selects the best available swapchain image format
void FindPresentFormat();
/// Sets the best available present mode
void SetPresentMode();
/// Sets the surface properties according to device capabilities
void SetSurfaceProperties();
/// Destroys current swapchain resources
void Destroy();
/// Performs creation of image views and framebuffers from the swapchain images
void SetupImages();
/// Creates the image acquired and present ready semaphores
void RefreshSemaphores();
private:
const Instance& instance;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};
vk::SurfaceFormatKHR surface_format;
vk::PresentModeKHR present_mode;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
vk::CompositeAlphaFlagBitsKHR composite_alpha;
std::vector<vk::Image> images;
std::vector<vk::Semaphore> image_acquired;
std::vector<vk::Semaphore> present_ready;
u32 width = 0;
u32 height = 0;
u32 image_count = 0;
u32 image_index = 0;
u32 frame_index = 0;
bool needs_recreation = true;
};
} // namespace Vulkan

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,295 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <deque>
#include <span>
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
VK_DEFINE_HANDLE(VmaAllocation)
namespace VideoCore {
struct Material;
}
namespace Vulkan {
class Instance;
class RenderpassCache;
class DescriptorPool;
class DescriptorSetProvider;
class Surface;
struct Handle {
VmaAllocation alloc;
vk::Image image;
vk::UniqueImageView image_view;
};
/**
* Provides texture manipulation functions to the rasterizer cache
* Separating this into a class makes it easier to abstract graphics API code
*/
class TextureRuntime {
friend class Surface;
public:
explicit TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorPool& pool,
DescriptorSetProvider& texture_provider, u32 num_swapchain_images);
~TextureRuntime();
const Instance& GetInstance() const {
return instance;
}
Scheduler& GetScheduler() const {
return scheduler;
}
RenderpassCache& GetRenderpassCache() {
return renderpass_cache;
}
/// Returns the removal threshold ticks for the garbage collector
u32 RemoveThreshold();
/// Maps an internal staging buffer of the provided size for pixel uploads/downloads
VideoCore::StagingData FindStaging(u32 size, bool upload);
/// Attempts to reinterpret a rectangle of source to another rectangle of dest
bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
/// Blits a rectangle of src_tex to another rectange of dst_rect
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface);
/// Returns true if the provided pixel format needs convertion
bool NeedsConversion(VideoCore::PixelFormat format) const;
/// Removes any descriptor sets that contain the provided image view.
void FreeDescriptorSetsWithImage(vk::ImageView image_view);
private:
/// Clears a partial texture rect using a clear rectangle
void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear);
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorSetProvider& texture_provider;
BlitHelper blit_helper;
StreamBuffer upload_buffer;
StreamBuffer download_buffer;
u32 num_swapchain_images;
};
class Surface : public VideoCore::SurfaceBase {
friend class TextureRuntime;
public:
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params);
explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface,
const VideoCore::Material* materal);
~Surface();
Surface(const Surface&) = delete;
Surface& operator=(const Surface&) = delete;
Surface(Surface&& o) noexcept = default;
Surface& operator=(Surface&& o) noexcept = default;
vk::ImageAspectFlags Aspect() const noexcept {
return traits.aspect;
}
/// Returns the image at index, otherwise the base image
vk::Image Image(u32 index = 1) const noexcept;
/// Returns the image view at index, otherwise the base view
vk::ImageView ImageView(u32 index = 1) const noexcept;
/// Returns a copy of the upscaled image handle, used for feedback loops.
vk::ImageView CopyImageView() noexcept;
/// Returns the framebuffer view of the surface image
vk::ImageView FramebufferView() noexcept;
/// Returns the depth view of the surface image
vk::ImageView DepthView() noexcept;
/// Returns the stencil view of the surface image
vk::ImageView StencilView() noexcept;
/// Returns the R32 image view used for atomic load/store
vk::ImageView StorageView() noexcept;
/// Returns a framebuffer handle for rendering to this surface
vk::Framebuffer Framebuffer() noexcept;
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);
/// Uploads the custom material to the surface allocation.
void UploadCustom(const VideoCore::Material* material, u32 level);
/// Downloads pixel data to staging from a rectangle region of the surface texture
void Download(const VideoCore::BufferTextureCopy& download,
const VideoCore::StagingData& staging);
/// Scales up the surface to match the new resolution scale.
void ScaleUp(u32 new_scale);
/// Returns the bpp of the internal surface format
u32 GetInternalBytesPerPixel() const;
/// Returns the access flags indicative of the surface
vk::AccessFlags AccessFlags() const noexcept;
/// Returns the pipeline stage flags indicative of the surface
vk::PipelineStageFlags PipelineStageFlags() const noexcept;
private:
/// Performs blit between the scaled/unscaled images
void BlitScale(const VideoCore::TextureBlit& blit, bool up_scale);
/// Downloads scaled depth stencil data
void DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const VideoCore::StagingData& staging);
public:
TextureRuntime* runtime;
const Instance* instance;
Scheduler* scheduler;
FormatTraits traits;
std::array<Handle, 3> handles{};
std::array<vk::UniqueFramebuffer, 2> framebuffers{};
Handle copy_handle;
vk::UniqueImageView depth_view;
vk::UniqueImageView stencil_view;
vk::UniqueImageView storage_view;
bool is_framebuffer{};
bool is_storage{};
};
class Framebuffer : public VideoCore::FramebufferParams {
public:
explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params,
Surface* color, Surface* depth_stencil);
~Framebuffer();
Framebuffer(const Framebuffer&) = delete;
Framebuffer& operator=(const Framebuffer&) = delete;
Framebuffer(Framebuffer&& o) noexcept = default;
Framebuffer& operator=(Framebuffer&& o) noexcept = default;
VideoCore::PixelFormat Format(VideoCore::SurfaceType type) const noexcept {
return formats[Index(type)];
}
[[nodiscard]] vk::ImageView ImageView(VideoCore::SurfaceType type) const noexcept {
return image_views[Index(type)];
}
[[nodiscard]] vk::Framebuffer Handle() const noexcept {
return framebuffer.get();
}
[[nodiscard]] std::array<vk::Image, 2> Images() const noexcept {
return images;
}
[[nodiscard]] std::array<vk::ImageAspectFlags, 2> Aspects() const noexcept {
return aspects;
}
[[nodiscard]] vk::RenderPass RenderPass() const noexcept {
return render_pass;
}
u32 Scale() const noexcept {
return res_scale;
}
u32 Width() const noexcept {
return width;
}
u32 Height() const noexcept {
return height;
}
private:
std::array<vk::Image, 2> images{};
std::array<vk::ImageView, 2> image_views{};
vk::UniqueFramebuffer framebuffer;
vk::RenderPass render_pass;
std::array<vk::ImageAspectFlags, 2> aspects{};
std::array<VideoCore::PixelFormat, 2> formats{VideoCore::PixelFormat::Invalid,
VideoCore::PixelFormat::Invalid};
u32 width{};
u32 height{};
u32 res_scale{1};
};
class Sampler {
public:
Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params);
~Sampler();
Sampler(const Sampler&) = delete;
Sampler& operator=(const Sampler&) = delete;
Sampler(Sampler&& o) noexcept = default;
Sampler& operator=(Sampler&& o) noexcept = default;
[[nodiscard]] vk::Sampler Handle() const noexcept {
return sampler.get();
}
private:
vk::UniqueSampler sampler;
};
class DebugScope {
public:
template <typename... T>
explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color,
fmt::format_string<T...> format, T... args)
: DebugScope{runtime, color, fmt::format(format, std::forward<T>(args)...)} {}
explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label);
~DebugScope();
private:
Scheduler& scheduler;
bool has_debug_tool;
};
struct Traits {
using Runtime = Vulkan::TextureRuntime;
using Surface = Vulkan::Surface;
using Sampler = Vulkan::Sampler;
using Framebuffer = Vulkan::Framebuffer;
using DebugScope = Vulkan::DebugScope;
};
using RasterizerCache = VideoCore::RasterizerCache<Traits>;
} // namespace Vulkan

View file

@ -14,6 +14,7 @@
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_software/renderer_software.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/video_core.h"
namespace VideoCore {
@ -39,6 +40,9 @@ void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window
case Settings::GraphicsAPI::Software:
g_renderer = std::make_unique<SwRenderer::RendererSoftware>(system, emu_window);
break;
case Settings::GraphicsAPI::Vulkan:
g_renderer = std::make_unique<Vulkan::RendererVulkan>(system, emu_window, secondary_window);
break;
case Settings::GraphicsAPI::OpenGL:
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window);
break;