From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 10 Jun 2021 02:24:12 -0300
Subject: [PATCH] buffer_cache: Fix copy based uniform bindings tracking

---
 src/video_core/buffer_cache/buffer_cache.h    | 19 +++++++++++++++----
 .../renderer_opengl/gl_buffer_cache.h         | 12 +++++++-----
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index ec64f22930..47cb0a47df 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -680,6 +680,9 @@ void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& m
                                             const UniformBufferSizes* sizes) {
     if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
         if (enabled_uniform_buffer_masks != mask) {
+            if constexpr (IS_OPENGL) {
+                fast_bound_uniform_buffers.fill(0);
+            }
             dirty_uniform_buffers.fill(~u32{0});
         }
     }
@@ -1020,6 +1023,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
                 // Fast path for Nvidia
                 if (!HasFastUniformBufferBound(stage, binding_index)) {
                     // We only have to bind when the currently bound buffer is not the fast version
+                    fast_bound_uniform_buffers[stage] |= 1U << binding_index;
                     runtime.BindFastUniformBuffer(stage, binding_index, size);
                 }
                 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -1027,8 +1031,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
                 return;
             }
         }
-        fast_bound_uniform_buffers[stage] |= 1U << binding_index;
-
+        if constexpr (IS_OPENGL) {
+            fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+        }
         // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
         const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
         cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -1046,9 +1051,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
         // This exists to avoid instances where the fast buffer is bound and a GPU write happens
         return;
     }
-    fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
-
     const u32 offset = buffer.Offset(cpu_addr);
+    if constexpr (IS_OPENGL) {
+        // Fast buffer will be unbound
+        fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
+
+        // Mark the index as dirty if offset doesn't match
+        const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
+        dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
+    }
     if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
         runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
     } else {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index bc16abafb2..060d364278 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -92,16 +92,14 @@ public:
                          VideoCore::Surface::PixelFormat format);
 
     void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+        const GLuint handle = fast_uniforms[stage][binding_index].handle;
+        const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
         if (use_assembly_shaders) {
-            const GLuint handle = fast_uniforms[stage][binding_index].handle;
-            const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
             glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
         } else {
             const GLuint base_binding = graphics_base_uniform_bindings[stage];
             const GLuint binding = base_binding + binding_index;
-            glBindBufferRange(GL_UNIFORM_BUFFER, binding,
-                              fast_uniforms[stage][binding_index].handle, 0,
-                              static_cast<GLsizeiptr>(size));
+            glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
         }
     }
 
@@ -134,6 +132,10 @@ public:
         return has_fast_buffer_sub_data;
     }
 
+    [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
+        return !use_assembly_shaders;
+    }
+
     void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
         graphics_base_uniform_bindings = bindings;
     }