renderer_vulkan: Import host memory for screenshots (#7132)

This commit is contained in:
Wunk 2023-11-12 13:02:55 -08:00 committed by GitHub
parent 23ca10472a
commit 831c9c4a38
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 254 additions and 21 deletions

View file

@ -8,12 +8,11 @@
#include <sysinfoapi.h> #include <sysinfoapi.h>
#else #else
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h>
#if defined(__APPLE__) || defined(__FreeBSD__) #if defined(__APPLE__) || defined(__FreeBSD__)
#include <sys/sysctl.h> #include <sys/sysctl.h>
#elif defined(__linux__) #elif defined(__linux__)
#include <sys/sysinfo.h> #include <sys/sysinfo.h>
#else
#include <unistd.h>
#endif #endif
#endif #endif
@ -64,4 +63,14 @@ const MemoryInfo GetMemInfo() {
return mem_info; return mem_info;
} }
u64 GetPageSize() {
#ifdef _WIN32
SYSTEM_INFO info;
::GetSystemInfo(&info);
return static_cast<u64>(info.dwPageSize);
#else
return static_cast<u64>(sysconf(_SC_PAGESIZE));
#endif
}
} // namespace Common } // namespace Common

View file

@ -19,4 +19,10 @@ struct MemoryInfo {
*/ */
[[nodiscard]] const MemoryInfo GetMemInfo(); [[nodiscard]] const MemoryInfo GetMemInfo();
/**
* Gets the page size of the host system
* @return Page size in bytes of the host system
*/
u64 GetPageSize();
} // namespace Common } // namespace Common

View file

@ -109,6 +109,8 @@ add_library(video_core STATIC
renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.cpp
renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_memory_util.cpp
renderer_vulkan/vk_memory_util.h
renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_rasterizer_cache.cpp renderer_vulkan/vk_rasterizer_cache.cpp

View file

@ -4,6 +4,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/memory_detect.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/settings.h" #include "common/settings.h"
#include "common/texture.h" #include "common/texture.h"
@ -13,6 +14,7 @@
#include "core/hw/hw.h" #include "core/hw/hw.h"
#include "core/hw/lcd.h" #include "core/hw/lcd.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_memory_util.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h" #include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h"
@ -865,6 +867,16 @@ void RendererVulkan::RenderScreenshot() {
return; return;
} }
if (!TryRenderScreenshotWithHostMemory()) {
RenderScreenshotWithStagingCopy();
}
settings.screenshot_complete_callback(false);
}
void RendererVulkan::RenderScreenshotWithStagingCopy() {
const vk::Device device = instance.GetDevice();
const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout}; const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout};
const u32 width = layout.width; const u32 width = layout.width;
const u32 height = layout.height; const u32 height = layout.height;
@ -895,6 +907,7 @@ void RendererVulkan::RenderScreenshot() {
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
UNREACHABLE(); UNREACHABLE();
} }
vk::Buffer staging_buffer{unsafe_buffer}; vk::Buffer staging_buffer{unsafe_buffer};
Frame frame{}; Frame frame{};
@ -969,18 +982,169 @@ void RendererVulkan::RenderScreenshot() {
// Ensure the copy is fully completed before saving the screenshot // Ensure the copy is fully completed before saving the screenshot
scheduler.Finish(); scheduler.Finish();
const vk::Device device = instance.GetDevice();
// Copy backing image data to the QImage screenshot buffer // Copy backing image data to the QImage screenshot buffer
std::memcpy(settings.screenshot_bits, alloc_info.pMappedData, staging_buffer_info.size); std::memcpy(settings.screenshot_bits, alloc_info.pMappedData, staging_buffer_info.size);
// Destroy allocated resources // Destroy allocated resources
vmaDestroyBuffer(instance.GetAllocator(), unsafe_buffer, allocation); vmaDestroyBuffer(instance.GetAllocator(), staging_buffer, allocation);
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
device.destroyFramebuffer(frame.framebuffer); device.destroyFramebuffer(frame.framebuffer);
device.destroyImageView(frame.image_view); device.destroyImageView(frame.image_view);
}
settings.screenshot_complete_callback(false); bool RendererVulkan::TryRenderScreenshotWithHostMemory() {
// If the host-memory import alignment matches the allocation granularity of the platform, then
// the entire span of memory can be trivially imported
const bool trivial_import =
instance.IsExternalMemoryHostSupported() &&
instance.GetMinImportedHostPointerAlignment() == Common::GetPageSize();
if (!trivial_import) {
return false;
}
const vk::Device device = instance.GetDevice();
const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout};
const u32 width = layout.width;
const u32 height = layout.height;
// For a span of memory [x, x + s], import [AlignDown(x, alignment), AlignUp(x + s, alignment)]
// and maintain an offset to the start of the data
const u64 import_alignment = instance.GetMinImportedHostPointerAlignment();
const uintptr_t address = reinterpret_cast<uintptr_t>(settings.screenshot_bits);
void* aligned_pointer = reinterpret_cast<void*>(Common::AlignDown(address, import_alignment));
const u64 offset = address % import_alignment;
const u64 aligned_size = Common::AlignUp(offset + width * height * 4ull, import_alignment);
// Buffer<->Image mapping for the imported imported buffer
const vk::BufferImageCopy buffer_image_copy = {
.bufferOffset = offset,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource =
{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, 1},
};
const vk::MemoryHostPointerPropertiesEXT import_properties =
device.getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, aligned_pointer);
if (!import_properties.memoryTypeBits) {
// Could not import memory
return false;
}
const std::optional<u32> memory_type_index = FindMemoryType(
instance.GetPhysicalDevice().getMemoryProperties(),
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
import_properties.memoryTypeBits);
if (!memory_type_index.has_value()) {
// Could not find memory type index
return false;
}
const vk::StructureChain<vk::MemoryAllocateInfo, vk::ImportMemoryHostPointerInfoEXT>
allocation_chain = {
vk::MemoryAllocateInfo{
.allocationSize = aligned_size,
.memoryTypeIndex = memory_type_index.value(),
},
vk::ImportMemoryHostPointerInfoEXT{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = aligned_pointer,
},
};
// Import host memory
const vk::UniqueDeviceMemory imported_memory =
device.allocateMemoryUnique(allocation_chain.get());
const vk::StructureChain<vk::BufferCreateInfo, vk::ExternalMemoryBufferCreateInfo> buffer_info =
{
vk::BufferCreateInfo{
.size = aligned_size,
.usage = vk::BufferUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive,
},
vk::ExternalMemoryBufferCreateInfo{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
},
};
// Bind imported memory to buffer
const vk::UniqueBuffer imported_buffer = device.createBufferUnique(buffer_info.get());
device.bindBufferMemory(imported_buffer.get(), imported_memory.get(), 0);
Frame frame{};
main_window.RecreateFrame(&frame, width, height);
DrawScreens(&frame, layout, false);
scheduler.Record([buffer_image_copy, source_image = frame.image,
imported_buffer = imported_buffer.get()](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier read_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = source_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const vk::ImageMemoryBarrier write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = source_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier);
cmdbuf.copyImageToBuffer(source_image, vk::ImageLayout::eTransferSrcOptimal,
imported_buffer, buffer_image_copy);
cmdbuf.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barrier);
});
// Ensure the copy is fully completed before saving the screenshot
scheduler.Finish();
// Image data has been copied directly to host memory
device.destroyFramebuffer(frame.framebuffer);
device.destroyImageView(frame.image_view);
return true;
} }
} // namespace Vulkan } // namespace Vulkan

View file

@ -90,6 +90,8 @@ private:
void ConfigureRenderPipeline(); void ConfigureRenderPipeline();
void PrepareRendertarget(); void PrepareRendertarget();
void RenderScreenshot(); void RenderScreenshot();
void RenderScreenshotWithStagingCopy();
bool TryRenderScreenshotWithHostMemory();
void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout); void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout);
void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout,
bool flipped); bool flipped);

View file

@ -407,7 +407,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
const vk::StructureChain properties_chain = const vk::StructureChain properties_chain =
physical_device.getProperties2<vk::PhysicalDeviceProperties2, physical_device.getProperties2<vk::PhysicalDeviceProperties2,
vk::PhysicalDevicePortabilitySubsetPropertiesKHR>(); vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
features = feature_chain.get().features; features = feature_chain.get().features;
if (available_extensions.empty()) { if (available_extensions.empty()) {
@ -415,7 +416,7 @@ bool Instance::CreateDevice() {
return false; return false;
} }
boost::container::static_vector<const char*, 12> enabled_extensions; boost::container::static_vector<const char*, 13> enabled_extensions;
const auto add_extension = [&](std::string_view extension, bool blacklist = false, const auto add_extension = [&](std::string_view extension, bool blacklist = false,
std::string_view reason = "") -> bool { std::string_view reason = "") -> bool {
const auto result = const auto result =
@ -445,6 +446,7 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
image_format_list = add_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); image_format_list = add_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
const bool has_timeline_semaphores = const bool has_timeline_semaphores =
add_extension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, is_qualcomm || is_turnip, add_extension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, is_qualcomm || is_turnip,
@ -589,6 +591,11 @@ bool Instance::CreateDevice() {
device_chain.unlink<vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(); device_chain.unlink<vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
} }
if (external_memory_host) {
PROP_GET(vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, minImportedHostPointerAlignment,
min_imported_host_pointer_alignment);
}
if (has_fragment_shader_barycentric) { if (has_fragment_shader_barycentric) {
FEAT_SET(vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR, fragmentShaderBarycentric, FEAT_SET(vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR, fragmentShaderBarycentric,
fragment_shader_barycentric) fragment_shader_barycentric)

View file

@ -168,6 +168,11 @@ public:
return shader_stencil_export; return shader_stencil_export;
} }
/// Returns true when VK_EXT_external_memory_host is supported
bool IsExternalMemoryHostSupported() const {
return external_memory_host;
}
/// Returns true when VK_KHR_fragment_shader_barycentric is supported /// Returns true when VK_KHR_fragment_shader_barycentric is supported
bool IsFragmentShaderBarycentricSupported() const { bool IsFragmentShaderBarycentricSupported() const {
return fragment_shader_barycentric; return fragment_shader_barycentric;
@ -248,6 +253,11 @@ public:
return min_vertex_stride_alignment; return min_vertex_stride_alignment;
} }
/// Returns the minimum imported host pointer alignment
u64 GetMinImportedHostPointerAlignment() const {
return min_imported_host_pointer_alignment;
}
/// Returns true if commands should be flushed at the end of each major renderpass /// Returns true if commands should be flushed at the end of each major renderpass
bool ShouldFlush() const { bool ShouldFlush() const {
return driver_id == vk::DriverIdKHR::eArmProprietary || return driver_id == vk::DriverIdKHR::eArmProprietary ||
@ -314,6 +324,8 @@ private:
bool pipeline_creation_cache_control{}; bool pipeline_creation_cache_control{};
bool fragment_shader_barycentric{}; bool fragment_shader_barycentric{};
bool shader_stencil_export{}; bool shader_stencil_export{};
bool external_memory_host{};
u64 min_imported_host_pointer_alignment{};
bool tooling_info{}; bool tooling_info{};
bool debug_utils_supported{}; bool debug_utils_supported{};
bool has_nsight_graphics{}; bool has_nsight_graphics{};

View file

@ -0,0 +1,23 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_memory_util.h"
namespace Vulkan {
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
vk::MemoryPropertyFlags wanted, std::bitset<32> memory_type_mask,
vk::MemoryPropertyFlags excluded) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
if (!memory_type_mask.test(i)) {
continue;
}
const auto flags = properties.memoryTypes[i].propertyFlags;
if (((flags & wanted) == wanted) && (!(flags & excluded))) {
return i;
}
}
return std::nullopt;
}
} // namespace Vulkan

View file

@ -0,0 +1,20 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <bitset>
#include <optional>
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(
const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted,
std::bitset<32> memory_type_mask = 0xFFFFFFFF,
vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected);
} // namespace Vulkan

View file

@ -7,6 +7,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h" #include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_memory_util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -43,19 +44,6 @@ vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
} }
} }
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(
const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted,
vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if (((flags & wanted) == wanted) && (!(flags & excluded))) {
return i;
}
}
return std::nullopt;
}
/// Get the preferred host visible memory type. /// Get the preferred host visible memory type.
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) { u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
vk::MemoryPropertyFlags flags = MakePropertyFlags(type); vk::MemoryPropertyFlags flags = MakePropertyFlags(type);