From a595ed499d3a21a251c3376c0c34a589c32088a9 Mon Sep 17 00:00:00 2001 From: Ameer J <52414509+ameerj@users.noreply.github.com> Date: Sun, 19 Nov 2023 17:17:16 -0500 Subject: [PATCH] gl_buffer_cache: Batch vertex/tfb buffer binding --- .../renderer_opengl/gl_buffer_cache.cpp | 76 +++++++++---------- .../renderer_opengl/gl_buffer_cache.h | 1 - src/video_core/renderer_opengl/gl_device.cpp | 1 - src/video_core/renderer_opengl/gl_device.h | 5 -- .../renderer_opengl/renderer_opengl.cpp | 17 +---- 5 files changed, 36 insertions(+), 64 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 38d553d3c2..9d5209e97e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -53,13 +53,11 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { buffer.Create(); - const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); - glObjectLabel(GL_BUFFER, buffer.handle, static_cast(name.size()), name.data()); - glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); - - if (runtime.has_unified_vertex_buffers) { - glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); + if (runtime.device.HasDebuggingToolAttached()) { + const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); + glObjectLabel(GL_BUFFER, buffer.handle, static_cast(name.size()), name.data()); } + glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); } void Buffer::ImmediateUpload(size_t offset, std::span data) noexcept { @@ -111,7 +109,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, : device{device_}, staging_buffer_pool{staging_buffer_pool_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, - has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional()} { GLint gl_max_attributes; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); @@ -123,16 +120,18 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, GL_STREAM_DRAW); } } - for (auto& stage_uniforms : copy_uniforms) { - for (OGLBuffer& buffer : stage_uniforms) { + if (use_assembly_shaders) { + for (auto& stage_uniforms : copy_uniforms) { + for (OGLBuffer& buffer : stage_uniforms) { + buffer.Create(); + glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); + } + } + for (OGLBuffer& buffer : copy_compute_uniforms) { buffer.Create(); glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); } } - for (OGLBuffer& buffer : copy_compute_uniforms) { - buffer.Create(); - glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); - } device_access_memory = [this]() -> u64 { if (device.CanReportMemoryUsage()) { @@ -206,14 +205,8 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz } void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { - if (has_unified_vertex_buffers) { - buffer.MakeResident(GL_READ_ONLY); - glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, - static_cast(Common::AlignUp(size, 4))); - } else { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); - index_buffer_offset = offset; - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); + index_buffer_offset = offset; } void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, @@ -221,24 +214,23 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, if (index >= max_attributes) { return; } - if (has_unified_vertex_buffers) { - buffer.MakeResident(GL_READ_ONLY); - glBindVertexBuffer(index, 0, 0, static_cast(stride)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, - buffer.HostGpuAddr() + offset, static_cast(size)); - } else { - glBindVertexBuffer(index, buffer.Handle(), static_cast(offset), - static_cast(stride)); - } + glBindVertexBuffer(index, buffer.Handle(), static_cast(offset), + static_cast(stride)); } void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { - for (u32 index = 0; index < bindings.buffers.size(); ++index) { - BindVertexBuffer(bindings.min_index + index, *bindings.buffers[index], - static_cast(bindings.offsets[index]), - static_cast(bindings.sizes[index]), - static_cast(bindings.strides[index])); - } + // TODO: Should HostBindings provide the correct runtime types to avoid these transforms? + std::array buffer_handles; + std::array buffer_strides; + std::ranges::transform(bindings.buffers, buffer_handles.begin(), + [](const Buffer* const buffer) { return buffer->Handle(); }); + std::ranges::transform(bindings.strides, buffer_strides.begin(), + [](u64 stride) { return static_cast(stride); }); + const u32 count = + std::min(static_cast(bindings.buffers.size()), max_attributes - bindings.min_index); + glBindVertexBuffers(bindings.min_index, static_cast(count), buffer_handles.data(), + reinterpret_cast(bindings.offsets.data()), + buffer_strides.data()); } void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, @@ -330,11 +322,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, } void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings) { - for (u32 index = 0; index < bindings.buffers.size(); ++index) { - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, bindings.buffers[index]->Handle(), - static_cast(bindings.offsets[index]), - static_cast(bindings.sizes[index])); - } + std::array buffer_handles; + std::ranges::transform(bindings.buffers, buffer_handles.begin(), + [](const Buffer* const buffer) { return buffer->Handle(); }); + glBindBuffersRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, + static_cast(bindings.buffers.size()), buffer_handles.data(), + reinterpret_cast(bindings.offsets.data()), + reinterpret_cast(bindings.strides.data())); } void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 41b746f3bf..8613037ebe 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -194,7 +194,6 @@ private: bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; - bool has_unified_vertex_buffers = false; bool use_storage_buffers = false; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 94258ccd0e..46d88c6644 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -200,7 +200,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_derivative_control = GLAD_GL_ARB_derivative_control; - has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a5a6bbbba7..96034ea4a1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -72,10 +72,6 @@ public: return has_texture_shadow_lod; } - bool HasVertexBufferUnifiedMemory() const { - return has_vertex_buffer_unified_memory; - } - bool HasASTC() const { return has_astc; } @@ -215,7 +211,6 @@ private: bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_texture_shadow_lod{}; - bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b8d4e554d..6bfed08a1f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -168,15 +168,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } RendererOpenGL::~RendererOpenGL() = default; @@ -680,13 +671,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - if (device.HasVertexBufferUnifiedMemory()) { - glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, - sizeof(vertices)); - } else { - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); - } + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { glBindSampler(0, present_sampler.handle);