gl_rasterizer: Use buffer_storage for uniform data.

This replaces the glBufferData logic with the shared stream buffer code.
The new code doesn't need a temporary staging buffer any more, so the
performance should imrpove quite a bit.
This commit is contained in:
Markus Wick 2018-05-02 09:34:36 +02:00
parent 7bffd7c1b0
commit 5960282303
2 changed files with 34 additions and 13 deletions

View file

@ -31,7 +31,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL()
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE) {
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE) {
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
state.clip_distance[0] = true;
@ -48,16 +49,12 @@ RasterizerOpenGL::RasterizerOpenGL()
// Generate VBO, VAO and UBO
vertex_array.Create();
uniform_buffer.Create();
state.draw.vertex_array = vertex_array.handle;
state.draw.vertex_buffer = vertex_buffer.GetHandle();
state.draw.uniform_buffer = uniform_buffer.handle;
state.draw.uniform_buffer = uniform_buffer.GetHandle();
state.Apply();
// Bind the UBO to binding point 0
glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle);
uniform_block_data.dirty = true;
uniform_block_data.lut_dirty.fill(true);
@ -70,6 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL()
uniform_block_data.proctex_lut_dirty = true;
uniform_block_data.proctex_diff_lut_dirty = true;
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
uniform_size_aligned_fs =
Common::AlignUp<size_t>(sizeof(UniformData), uniform_buffer_alignment);
// Set vertex attributes
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE,
sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
@ -475,11 +476,7 @@ void RasterizerOpenGL::DrawTriangles() {
}
// Sync the uniform data
if (uniform_block_data.dirty) {
glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data,
GL_STATIC_DRAW);
uniform_block_data.dirty = false;
}
UploadUniforms();
// Viewport can have negative offsets or larger
// dimensions than our framebuffer sub-rect.
@ -1652,3 +1649,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
uniform_block_data.dirty = true;
}
}
void RasterizerOpenGL::UploadUniforms() {
if (!uniform_block_data.dirty)
return;
size_t uniform_size = uniform_size_aligned_fs;
u8* uniforms;
GLintptr offset;
std::tie(uniforms, offset, std::ignore) =
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
std::memcpy(uniforms, &uniform_block_data.data, sizeof(UniformData));
uniform_buffer.Unmap(uniform_size);
glBindBufferRange(GL_UNIFORM_BUFFER, 0, uniform_buffer.GetHandle(), offset,
sizeof(UniformData));
uniform_block_data.dirty = false;
}

View file

@ -215,6 +215,9 @@ private:
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms();
OpenGLState state;
RasterizerCacheOpenGL res_cache;
@ -237,12 +240,17 @@ private:
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// They shall be big enough for about one frame.
static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
OGLStreamBuffer vertex_buffer;
OGLBuffer uniform_buffer;
OGLStreamBuffer uniform_buffer;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
size_t uniform_size_aligned_fs;
// TODO (wwylele): consider caching texture cube in the rasterizer cache
OGLTexture texture_cube;