From 831c9c4a3814b24903c4edb719f22b4bfe0995ea Mon Sep 17 00:00:00 2001 From: Wunk Date: Sun, 12 Nov 2023 13:02:55 -0800 Subject: [PATCH] renderer_vulkan: Import host memory for screenshots (#7132) --- src/common/memory_detect.cpp | 13 +- src/common/memory_detect.h | 6 + src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/renderer_vulkan.cpp | 172 +++++++++++++++++- .../renderer_vulkan/renderer_vulkan.h | 2 + .../renderer_vulkan/vk_instance.cpp | 11 +- src/video_core/renderer_vulkan/vk_instance.h | 12 ++ .../renderer_vulkan/vk_memory_util.cpp | 23 +++ .../renderer_vulkan/vk_memory_util.h | 20 ++ .../renderer_vulkan/vk_stream_buffer.cpp | 14 +- 10 files changed, 254 insertions(+), 21 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_memory_util.cpp create mode 100644 src/video_core/renderer_vulkan/vk_memory_util.h diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp index e97ff9af2..dff3ae131 100644 --- a/src/common/memory_detect.cpp +++ b/src/common/memory_detect.cpp @@ -8,12 +8,11 @@ #include #else #include +#include #if defined(__APPLE__) || defined(__FreeBSD__) #include #elif defined(__linux__) #include -#else -#include #endif #endif @@ -64,4 +63,14 @@ const MemoryInfo GetMemInfo() { return mem_info; } +u64 GetPageSize() { +#ifdef _WIN32 + SYSTEM_INFO info; + ::GetSystemInfo(&info); + return static_cast(info.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif +} + } // namespace Common diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h index eb07d8ab0..dce3529d0 100644 --- a/src/common/memory_detect.h +++ b/src/common/memory_detect.h @@ -19,4 +19,10 @@ struct MemoryInfo { */ [[nodiscard]] const MemoryInfo GetMemInfo(); +/** + * Gets the page size of the host system + * @return Page size in bytes of the host system + */ +u64 GetPageSize(); + } // namespace Common diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 252a80c17..dc9be8228 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -109,6 +109,8 @@ add_library(video_core STATIC renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h + renderer_vulkan/vk_memory_util.cpp + renderer_vulkan/vk_memory_util.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_rasterizer_cache.cpp diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 5986b3b7e..2f63ecd2f 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/memory_detect.h" #include "common/microprofile.h" #include "common/settings.h" #include "common/texture.h" @@ -13,6 +14,7 @@ #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_memory_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h" @@ -865,6 +867,16 @@ void RendererVulkan::RenderScreenshot() { return; } + if (!TryRenderScreenshotWithHostMemory()) { + RenderScreenshotWithStagingCopy(); + } + + settings.screenshot_complete_callback(false); +} + +void RendererVulkan::RenderScreenshotWithStagingCopy() { + const vk::Device device = instance.GetDevice(); + const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout}; const u32 width = layout.width; const u32 height = layout.height; @@ -895,6 +907,7 @@ void RendererVulkan::RenderScreenshot() { LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); UNREACHABLE(); } + vk::Buffer staging_buffer{unsafe_buffer}; Frame frame{}; @@ -969,18 +982,169 @@ void RendererVulkan::RenderScreenshot() { // Ensure the copy is fully completed before saving the screenshot scheduler.Finish(); - const vk::Device device = instance.GetDevice(); - // Copy backing image data to the QImage screenshot buffer std::memcpy(settings.screenshot_bits, alloc_info.pMappedData, staging_buffer_info.size); // Destroy allocated resources - vmaDestroyBuffer(instance.GetAllocator(), unsafe_buffer, allocation); + vmaDestroyBuffer(instance.GetAllocator(), staging_buffer, allocation); vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); device.destroyFramebuffer(frame.framebuffer); device.destroyImageView(frame.image_view); +} - settings.screenshot_complete_callback(false); +bool RendererVulkan::TryRenderScreenshotWithHostMemory() { + // If the host-memory import alignment matches the allocation granularity of the platform, then + // the entire span of memory can be trivially imported + const bool trivial_import = + instance.IsExternalMemoryHostSupported() && + instance.GetMinImportedHostPointerAlignment() == Common::GetPageSize(); + if (!trivial_import) { + return false; + } + + const vk::Device device = instance.GetDevice(); + + const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout}; + const u32 width = layout.width; + const u32 height = layout.height; + + // For a span of memory [x, x + s], import [AlignDown(x, alignment), AlignUp(x + s, alignment)] + // and maintain an offset to the start of the data + const u64 import_alignment = instance.GetMinImportedHostPointerAlignment(); + const uintptr_t address = reinterpret_cast(settings.screenshot_bits); + void* aligned_pointer = reinterpret_cast(Common::AlignDown(address, import_alignment)); + const u64 offset = address % import_alignment; + const u64 aligned_size = Common::AlignUp(offset + width * height * 4ull, import_alignment); + + // Buffer<->Image mapping for the imported imported buffer + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, 1}, + }; + + const vk::MemoryHostPointerPropertiesEXT import_properties = + device.getMemoryHostPointerPropertiesEXT( + vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, aligned_pointer); + + if (!import_properties.memoryTypeBits) { + // Could not import memory + return false; + } + + const std::optional memory_type_index = FindMemoryType( + instance.GetPhysicalDevice().getMemoryProperties(), + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + import_properties.memoryTypeBits); + + if (!memory_type_index.has_value()) { + // Could not find memory type index + return false; + } + + const vk::StructureChain + allocation_chain = { + vk::MemoryAllocateInfo{ + .allocationSize = aligned_size, + .memoryTypeIndex = memory_type_index.value(), + }, + vk::ImportMemoryHostPointerInfoEXT{ + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + .pHostPointer = aligned_pointer, + }, + }; + + // Import host memory + const vk::UniqueDeviceMemory imported_memory = + device.allocateMemoryUnique(allocation_chain.get()); + + const vk::StructureChain buffer_info = + { + vk::BufferCreateInfo{ + .size = aligned_size, + .usage = vk::BufferUsageFlagBits::eTransferDst, + .sharingMode = vk::SharingMode::eExclusive, + }, + vk::ExternalMemoryBufferCreateInfo{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + }, + }; + + // Bind imported memory to buffer + const vk::UniqueBuffer imported_buffer = device.createBufferUnique(buffer_info.get()); + device.bindBufferMemory(imported_buffer.get(), imported_memory.get(), 0); + + Frame frame{}; + main_window.RecreateFrame(&frame, width, height); + + DrawScreens(&frame, layout, false); + + scheduler.Record([buffer_image_copy, source_image = frame.image, + imported_buffer = imported_buffer.get()](vk::CommandBuffer cmdbuf) { + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + static constexpr vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + cmdbuf.copyImageToBuffer(source_image, vk::ImageLayout::eTransferSrcOptimal, + imported_buffer, buffer_image_copy); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barrier); + }); + + // Ensure the copy is fully completed before saving the screenshot + scheduler.Finish(); + + // Image data has been copied directly to host memory + device.destroyFramebuffer(frame.framebuffer); + device.destroyImageView(frame.image_view); + + return true; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 103941a9e..f180a2644 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -90,6 +90,8 @@ private: void ConfigureRenderPipeline(); void PrepareRendertarget(); void RenderScreenshot(); + void RenderScreenshotWithStagingCopy(); + bool TryRenderScreenshotWithHostMemory(); void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout); void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, bool flipped); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 519138e2f..c541af6e5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -407,7 +407,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); + vk::PhysicalDevicePortabilitySubsetPropertiesKHR, + vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>(); features = feature_chain.get().features; if (available_extensions.empty()) { @@ -415,7 +416,7 @@ bool Instance::CreateDevice() { return false; } - boost::container::static_vector enabled_extensions; + boost::container::static_vector enabled_extensions; const auto add_extension = [&](std::string_view extension, bool blacklist = false, std::string_view reason = "") -> bool { const auto result = @@ -445,6 +446,7 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); image_format_list = add_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); + external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME); tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); const bool has_timeline_semaphores = add_extension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, is_qualcomm || is_turnip, @@ -589,6 +591,11 @@ bool Instance::CreateDevice() { device_chain.unlink(); } + if (external_memory_host) { + PROP_GET(vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, minImportedHostPointerAlignment, + min_imported_host_pointer_alignment); + } + if (has_fragment_shader_barycentric) { FEAT_SET(vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR, fragmentShaderBarycentric, fragment_shader_barycentric) diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d5fe88a97..88f0a3fca 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -168,6 +168,11 @@ public: return shader_stencil_export; } + /// Returns true when VK_EXT_external_memory_host is supported + bool IsExternalMemoryHostSupported() const { + return external_memory_host; + } + /// Returns true when VK_KHR_fragment_shader_barycentric is supported bool IsFragmentShaderBarycentricSupported() const { return fragment_shader_barycentric; @@ -248,6 +253,11 @@ public: return min_vertex_stride_alignment; } + /// Returns the minimum imported host pointer alignment + u64 GetMinImportedHostPointerAlignment() const { + return min_imported_host_pointer_alignment; + } + /// Returns true if commands should be flushed at the end of each major renderpass bool ShouldFlush() const { return driver_id == vk::DriverIdKHR::eArmProprietary || @@ -314,6 +324,8 @@ private: bool pipeline_creation_cache_control{}; bool fragment_shader_barycentric{}; bool shader_stencil_export{}; + bool external_memory_host{}; + u64 min_imported_host_pointer_alignment{}; bool tooling_info{}; bool debug_utils_supported{}; bool has_nsight_graphics{}; diff --git a/src/video_core/renderer_vulkan/vk_memory_util.cpp b/src/video_core/renderer_vulkan/vk_memory_util.cpp new file mode 100644 index 000000000..423d10e91 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_util.cpp @@ -0,0 +1,23 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_memory_util.h" + +namespace Vulkan { + +std::optional FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, + vk::MemoryPropertyFlags wanted, std::bitset<32> memory_type_mask, + vk::MemoryPropertyFlags excluded) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + if (!memory_type_mask.test(i)) { + continue; + } + const auto flags = properties.memoryTypes[i].propertyFlags; + if (((flags & wanted) == wanted) && (!(flags & excluded))) { + return i; + } + } + return std::nullopt; +} +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_util.h b/src/video_core/renderer_vulkan/vk_memory_util.h new file mode 100644 index 000000000..0eaea45de --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_memory_util.h @@ -0,0 +1,20 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +/// Find a memory type with the passed requirements +std::optional FindMemoryType( + const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted, + std::bitset<32> memory_type_mask = 0xFFFFFFFF, + vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index fd557bdd8..4690e69bf 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -7,6 +7,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_memory_util.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -43,19 +44,6 @@ vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) { } } -/// Find a memory type with the passed requirements -std::optional FindMemoryType( - const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted, - vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected) { - for (u32 i = 0; i < properties.memoryTypeCount; ++i) { - const auto flags = properties.memoryTypes[i].propertyFlags; - if (((flags & wanted) == wanted) && (!(flags & excluded))) { - return i; - } - } - return std::nullopt; -} - /// Get the preferred host visible memory type. u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) { vk::MemoryPropertyFlags flags = MakePropertyFlags(type);