diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 398f16181a..56e5709943 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -88,10 +88,6 @@ public: map->MarkAsWritten(true); MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); } - } else { - if (map->IsWritten()) { - WriteBarrier(); - } } return {ToHandle(block), static_cast(block->GetOffset(cpu_addr))}; @@ -253,8 +249,6 @@ protected: virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; - virtual void WriteBarrier() = 0; - virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 39e3b66a23..7db055ea0d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -184,6 +184,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { } switch (method) { + case MAXWELL3D_REG_INDEX(wait_for_idle): { + rasterizer.WaitForIdle(); + break; + } case MAXWELL3D_REG_INDEX(shadow_ram_control): { shadow_state.shadow_ram_control = static_cast(method_call.argument); break; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5e522e0d28..864924ff33 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -709,7 +709,9 @@ public: union { struct { - INSERT_UNION_PADDING_WORDS(0x45); + INSERT_UNION_PADDING_WORDS(0x44); + + u32 wait_for_idle; struct { u32 upload_address; @@ -1536,6 +1538,7 @@ private: static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +ASSERT_REG_POSITION(wait_for_idle, 0x44); ASSERT_REG_POSITION(macros, 0x45); ASSERT_REG_POSITION(shadow_ram_control, 0x49); ASSERT_REG_POSITION(upload, 0x60); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 603f619523..3cbdac8e70 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -80,6 +80,9 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + /// Notify the host renderer to wait for previous primitive and compute operations. + virtual void WaitForIdle() = 0; + /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 4efce0de77..d2cab50bd9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -51,10 +51,6 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { return std::make_shared(cpu_addr, size); } -void OGLBufferCache::WriteBarrier() { - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); -} - GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a748178575..a9e86cfc7c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -59,8 +59,6 @@ protected: GLuint ToHandle(const Buffer& buffer) override; - void WriteBarrier() override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 725b4c32d0..8b3b3ce92f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -746,6 +746,17 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { InvalidateRegion(addr, size); } +void RasterizerOpenGL::WaitForIdle() { + // Place a barrier on everything that is not framebuffer related. + // This is related to another flag that is not currently implemented. + glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | + GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | + GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | + GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT | + GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); +} + void RasterizerOpenGL::FlushCommands() { // Only flush when we have commands queued to OpenGL. if (num_queued_commands == 0) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 87249fb6f3..b94c65907d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -75,6 +75,7 @@ public: void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void WaitForIdle() override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a2d0b42b13..a54583e7d0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -52,8 +52,6 @@ public: protected: VkBuffer ToHandle(const Buffer& buffer) override; - void WriteBarrier() override {} - Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 722fde3848..8b009fc22b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -299,7 +299,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler) { + query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { scheduler.SetQueryCache(query_cache); } @@ -573,6 +573,26 @@ void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { InvalidateRegion(addr, size); } +void RasterizerVulkan::WaitForIdle() { + // Everything but wait pixel operations. This intentionally includes FRAGMENT_SHADER_BIT because + // fragment shaders can still write storage buffers. + VkPipelineStageFlags flags = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; + } + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { + cmdbuf.SetEvent(event, flags); + cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); + }); +} + void RasterizerVulkan::FlushCommands() { if (draw_counter > 0) { draw_counter = 0; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 703a094c31..0ed0e48c67 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -126,6 +126,7 @@ public: void SignalSyncPoint(u32 value) override; void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void WaitForIdle() override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, @@ -275,6 +276,7 @@ private: vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; + vk::Event wfi_event; std::array color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 7f5bc1404b..2ce9b06264 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -87,6 +87,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetStencilReference); X(vkCmdSetStencilWriteMask); X(vkCmdSetViewport); + X(vkCmdWaitEvents); X(vkCreateBuffer); X(vkCreateBufferView); X(vkCreateCommandPool); diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index bda16a2cb4..98937a77a6 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -205,6 +205,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkCmdSetStencilReference vkCmdSetStencilReference; PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; PFN_vkCmdSetViewport vkCmdSetViewport; + PFN_vkCmdWaitEvents vkCmdWaitEvents; PFN_vkCreateBuffer vkCreateBuffer; PFN_vkCreateBufferView vkCreateBufferView; PFN_vkCreateCommandPool vkCreateCommandPool; @@ -958,6 +959,15 @@ public: dld->vkCmdSetEvent(handle, event, stage_flags); } + void WaitEvents(Span events, VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask, Span memory_barriers, + Span buffer_barriers, + Span image_barriers) const noexcept { + dld->vkCmdWaitEvents(handle, events.size(), events.data(), src_stage_mask, dst_stage_mask, + memory_barriers.size(), memory_barriers.data(), buffer_barriers.size(), + buffer_barriers.data(), image_barriers.size(), image_barriers.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept {