diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a71866b75b..b787b69948 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast glObjectLabel(GL_BUFFER, buffer.handle, static_cast(name.size()), name.data()); } glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); + if (runtime.has_unified_vertex_buffers) { + glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); + } } void Buffer::ImmediateUpload(size_t offset, std::span data) noexcept { @@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, : device{device_}, staging_buffer_pool{staging_buffer_pool_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, + has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional()} { GLint gl_max_attributes; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); @@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz } void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); - index_buffer_offset = offset; + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, + static_cast(Common::AlignUp(size, 4))); + } else { + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); + index_buffer_offset = offset; + } } void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, @@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, if (index >= max_attributes) { return; } - glBindVertexBuffer(index, buffer.Handle(), static_cast(offset), - static_cast(stride)); + if (has_unified_vertex_buffers) { + buffer.MakeResident(GL_READ_ONLY); + glBindVertexBuffer(index, 0, 0, static_cast(stride)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, + buffer.HostGpuAddr() + offset, static_cast(size)); + } else { + glBindVertexBuffer(index, buffer.Handle(), static_cast(offset), + static_cast(stride)); + } } void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bindings) { @@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi [](u64 stride) { return static_cast(stride); }); const u32 count = std::min(static_cast(bindings.buffers.size()), max_attributes - bindings.min_index); - glBindVertexBuffers(bindings.min_index, static_cast(count), buffer_handles.data(), - reinterpret_cast(bindings.offsets.data()), - buffer_strides.data()); + if (has_unified_vertex_buffers) { + for (u32 index = 0; index < count; ++index) { + Buffer& buffer = *bindings.buffers[index]; + buffer.MakeResident(GL_READ_ONLY); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index, + buffer.HostGpuAddr() + bindings.offsets[index], + static_cast(bindings.sizes[index])); + } + static constexpr std::array ZEROS{}; + glBindVertexBuffers(bindings.min_index, static_cast(count), + reinterpret_cast(ZEROS.data()), + reinterpret_cast(ZEROS.data()), buffer_strides.data()); + } else { + glBindVertexBuffers(bindings.min_index, static_cast(count), buffer_handles.data(), + reinterpret_cast(bindings.offsets.data()), + buffer_strides.data()); + } } void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 71cd45d35c..1e8708f59e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -209,6 +209,7 @@ private: bool has_fast_buffer_sub_data = false; bool use_assembly_shaders = false; + bool has_unified_vertex_buffers = false; bool use_storage_buffers = false; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a6c93068fc..993438a27b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; has_derivative_control = GLAD_GL_ARB_derivative_control; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 96034ea4a1..a5a6bbbba7 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -72,6 +72,10 @@ public: return has_texture_shadow_lod; } + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -211,6 +215,7 @@ private: bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 279e5a4e08..4832c03c57 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -162,14 +162,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) { SyncFramebufferSRGB(); } if (regs.clear_surface.Z) { - ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); + if (regs.zeta_enable != 0) { + LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!"); + } use_depth = true; state_tracker.NotifyDepthMask(); glDepthMask(GL_TRUE); } if (regs.clear_surface.S) { - ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); + if (regs.zeta_enable) { + LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!"); + } use_stencil = true; } @@ -1294,15 +1298,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum program->ConfigureTransformFeedback(); UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); - UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); + regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); // We may have to call BeginTransformFeedbackNV here since they seem to call different // implementations on Nvidia's driver (the pointer is different) but we are using // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. - glBeginTransformFeedback(GL_POINTS); + glBeginTransformFeedback(primitive_mode); } void RasterizerOpenGL::EndTransformFeedback() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7a4f0c5c18..2933718b68 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { glBindSampler(0, present_sampler.handle);