From 5960282303e3b5883b14a0174dcd0af3646e2f16 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Wed, 2 May 2018 09:34:36 +0200 Subject: [PATCH] gl_rasterizer: Use buffer_storage for uniform data. This replaces the glBufferData logic with the shared stream buffer code. The new code doesn't need a temporary staging buffer any more, so the performance should imrpove quite a bit. --- .../renderer_opengl/gl_rasterizer.cpp | 35 +++++++++++++------ .../renderer_opengl/gl_rasterizer.h | 12 +++++-- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7c44a77cb..2e738694e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -31,7 +31,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); RasterizerOpenGL::RasterizerOpenGL() - : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE) { + : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), + uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) { // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -48,16 +49,12 @@ RasterizerOpenGL::RasterizerOpenGL() // Generate VBO, VAO and UBO vertex_array.Create(); - uniform_buffer.Create(); state.draw.vertex_array = vertex_array.handle; state.draw.vertex_buffer = vertex_buffer.GetHandle(); - state.draw.uniform_buffer = uniform_buffer.handle; + state.draw.uniform_buffer = uniform_buffer.GetHandle(); state.Apply(); - // Bind the UBO to binding point 0 - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - uniform_block_data.dirty = true; uniform_block_data.lut_dirty.fill(true); @@ -70,6 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.proctex_lut_dirty = true; uniform_block_data.proctex_diff_lut_dirty = true; + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(UniformData), uniform_buffer_alignment); + // Set vertex attributes glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); @@ -475,11 +476,7 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, - GL_STATIC_DRAW); - uniform_block_data.dirty = false; - } + UploadUniforms(); // Viewport can have negative offsets or larger // dimensions than our framebuffer sub-rect. @@ -1652,3 +1649,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { uniform_block_data.dirty = true; } } + +void RasterizerOpenGL::UploadUniforms() { + if (!uniform_block_data.dirty) + return; + + size_t uniform_size = uniform_size_aligned_fs; + u8* uniforms; + GLintptr offset; + std::tie(uniforms, offset, std::ignore) = + uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData)); + uniform_buffer.Unmap(uniform_size); + glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset, + sizeof(UniformData)); + uniform_block_data.dirty = false; +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 02771a189..c02d3ece7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -215,6 +215,9 @@ private: /// Syncs the specified light's distance attenuation scale to match the PICA register void SyncLightDistanceAttenuationScale(int light_index); + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(); + OpenGLState state; RasterizerCacheOpenGL res_cache; @@ -237,12 +240,17 @@ private: std::unique_ptr shader_program_manager; + // They shall be big enough for about one frame. + static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; + static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + std::array texture_samplers; OGLVertexArray vertex_array; - static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; OGLStreamBuffer vertex_buffer; - OGLBuffer uniform_buffer; + OGLStreamBuffer uniform_buffer; OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; + size_t uniform_size_aligned_fs; // TODO (wwylele): consider caching texture cube in the rasterizer cache OGLTexture texture_cube;