From 10fba63b94313a7228ef494634742786b4a03caf Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 09:48:41 +0200 Subject: [PATCH 01/12] gl_rasterizer: Provide texture buffer offsets. This allows us to move all data into one TBO. --- .../renderer_opengl/gl_rasterizer.cpp | 3 ++ .../renderer_opengl/gl_shader_gen.cpp | 38 ++++++++++++------- .../renderer_opengl/gl_shader_manager.h | 10 ++++- src/video_core/renderer_opengl/pica_to_gl.h | 4 ++ 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1318a332e..d8e1cd5fc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.proctex_lut_dirty = true; uniform_block_data.proctex_diff_lut_dirty = true; + for (int i = 0; i < 24; i++) + uniform_block_data.data.lighting_lut_offset[i / 4][i % 4] = 256 * i; + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0ae9794de..6a8f1df44 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,6 +32,7 @@ namespace GLShader { static const std::string UniformBlockDef = R"( #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 struct LightSrc { vec3 specular_0; @@ -55,6 +56,13 @@ layout (std140) uniform shader_data { int scissor_y1; int scissor_x2; int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; vec3 fog_color; vec2 proctex_noise_f; vec2 proctex_noise_a; @@ -1017,7 +1025,7 @@ void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp m } void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - const std::string& map_lut) { + const std::string& map_lut, const std::string& offset) { std::string combined; switch (combiner) { case ProcTexCombiner::U: @@ -1055,7 +1063,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, combined = "0.0"; break; } - out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")"; + out += "ProcTexLookupLUT(" + map_lut + ", " + offset + ", " + combined + ")"; } void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { @@ -1064,12 +1072,12 @@ void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( -float ProcTexLookupLUT(samplerBuffer lut, float coord) { +float ProcTexLookupLUT(samplerBuffer lut, int offset, float coord) { coord *= 128; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(lut, int(index_i)).rg; + vec2 entry = texelFetch(lut, int(index_i) + offset).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; @@ -1105,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) { float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y); + float x_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); @@ -1148,7 +1156,8 @@ float ProcTexNoiseCoef(vec2 x) { // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map", + "proctex_color_map_offset"); out += ";\n"; // Look up color @@ -1162,14 +1171,16 @@ float ProcTexNoiseCoef(vec2 x) { out += "int lut_index_i = int(lut_coord) + " + std::to_string(config.state.proctex.lut_offset) + ";\n"; out += "float lut_index_f = fract(lut_coord);\n"; - out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i) + lut_index_f * " - "texelFetch(proctex_diff_lut, lut_index_i);\n"; + out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i + proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(proctex_diff_lut, lut_index_i + proctex_diff_lut_offset);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; - out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)));\n"; + out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; break; } @@ -1177,7 +1188,8 @@ float ProcTexNoiseCoef(vec2 x) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map", + "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { @@ -1238,7 +1250,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { } float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg; + vec2 entry = texelFetch(lighting_lut, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } @@ -1481,7 +1493,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; - out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n"; + out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i) + fog_lut_offset).rg;\n"; out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 73acc3297..3233f99e7 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,6 +6,7 @@ #include #include +#include "video_core/regs_lighting.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/pica_to_gl.h" @@ -38,6 +39,13 @@ struct UniformData { GLint scissor_y1; GLint scissor_x2; GLint scissor_y2; + GLint fog_lut_offset; + GLint proctex_noise_lut_offset; + GLint proctex_color_map_offset; + GLint proctex_alpha_map_offset; + GLint proctex_lut_offset; + GLint proctex_diff_lut_offset; + alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; alignas(16) GLvec3 fog_color; alignas(8) GLvec2 proctex_noise_f; alignas(8) GLvec2 proctex_noise_a; @@ -50,7 +58,7 @@ struct UniformData { }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x4e0, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 45d4bc4bb..faada1556 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -23,6 +23,10 @@ using GLuvec2 = std::array; using GLuvec3 = std::array; using GLuvec4 = std::array; +using GLivec2 = std::array; +using GLivec3 = std::array; +using GLivec4 = std::array; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) { From 8a8c6f059fb92f5cdbec531acd4329959c4915c6 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 11:12:51 +0200 Subject: [PATCH 02/12] gl_rasterizer: Move TBO syncing helper to one function. --- .../renderer_opengl/gl_rasterizer.cpp | 91 ++++++++++--------- .../renderer_opengl/gl_rasterizer.h | 3 + 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d8e1cd5fc..01d139aa6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -806,49 +806,8 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { shader_dirty = false; } - // Sync the lighting luts - for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { - if (uniform_block_data.lut_dirty[index]) { - SyncLightingLUT(index); - uniform_block_data.lut_dirty[index] = false; - } - } - - // Sync the fog lut - if (uniform_block_data.fog_lut_dirty) { - SyncFogLUT(); - uniform_block_data.fog_lut_dirty = false; - } - - // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty) { - SyncProcTexNoiseLUT(); - uniform_block_data.proctex_noise_lut_dirty = false; - } - - // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty) { - SyncProcTexColorMap(); - uniform_block_data.proctex_color_map_dirty = false; - } - - // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty) { - SyncProcTexAlphaMap(); - uniform_block_data.proctex_alpha_map_dirty = false; - } - - // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty) { - SyncProcTexLUT(); - uniform_block_data.proctex_lut_dirty = false; - } - - // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty) { - SyncProcTexDiffLUT(); - uniform_block_data.proctex_diff_lut_dirty = false; - } + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); // Sync the uniform data const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes; @@ -2065,6 +2024,52 @@ void RasterizerOpenGL::SyncShadowBias() { } } +void RasterizerOpenGL::SyncAndUploadLUTs() { + // Sync the lighting luts + for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { + if (uniform_block_data.lut_dirty[index]) { + SyncLightingLUT(index); + uniform_block_data.lut_dirty[index] = false; + } + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty) { + SyncFogLUT(); + uniform_block_data.fog_lut_dirty = false; + } + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty) { + SyncProcTexNoiseLUT(); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty) { + SyncProcTexColorMap(); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty) { + SyncProcTexAlphaMap(); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty) { + SyncProcTexLUT(); + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty) { + SyncProcTexDiffLUT(); + uniform_block_data.proctex_diff_lut_dirty = false; + } +} + void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { // glBindBufferRange below also changes the generic buffer binding point, so we sync the state // first diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 34058796b..1d2c27ddd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -220,6 +220,9 @@ private: /// Syncs the shadow rendering bias to match the PICA register void SyncShadowBias(); + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + /// Upload the uniform blocks to the uniform buffer object void UploadUniforms(bool accelerate_draw, bool use_gs); From 298ebe3752725cf8a19cf14c7beed5e3e5fab394 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 12:00:14 +0200 Subject: [PATCH 03/12] gl_rasterizer: Inline texture buffer uploads. --- .../renderer_opengl/gl_rasterizer.cpp | 179 ++++++++---------- .../renderer_opengl/gl_rasterizer.h | 11 -- 2 files changed, 78 insertions(+), 112 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 01d139aa6..26e66640b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1725,21 +1725,6 @@ void RasterizerOpenGL::SyncFogColor() { uniform_block_data.dirty = true; } -void RasterizerOpenGL::SyncFogLUT() { - std::array new_data; - - std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), - [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != fog_lut_data) { - fog_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - void RasterizerOpenGL::SyncProcTexNoise() { const auto& regs = Pica::g_state.regs.texturing; uniform_block_data.data.proctex_noise_f = { @@ -1758,70 +1743,6 @@ void RasterizerOpenGL::SyncProcTexNoise() { uniform_block_data.dirty = true; } -// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap -static void SyncProcTexValueLUT(const std::array& lut, - std::array& lut_data, GLuint buffer) { - std::array new_data; - std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lut_data) { - lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, buffer); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - -void RasterizerOpenGL::SyncProcTexNoiseLUT() { - SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - proctex_noise_lut_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexColorMap() { - SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - proctex_color_map_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexAlphaMap() { - SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - proctex_alpha_map_buffer.handle); -} - -void RasterizerOpenGL::SyncProcTexLUT() { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_table.begin(), - Pica::g_state.proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_lut_data) { - proctex_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); - } -} - -void RasterizerOpenGL::SyncProcTexDiffLUT() { - std::array new_data; - - std::transform(Pica::g_state.proctex.color_diff_table.begin(), - Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_diff_lut_data) { - proctex_diff_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data()); - } -} - void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { @@ -1919,21 +1840,6 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } } -void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[lut_index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); - - if (new_data != lighting_lut_data[lut_index]) { - lighting_lut_data[lut_index] = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2), - new_data.size() * sizeof(GLvec2), new_data.data()); - } -} - void RasterizerOpenGL::SyncLightSpecular0(int light_index) { auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); if (color != uniform_block_data.data.light_src[light_index].specular_0) { @@ -2028,44 +1934,115 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { // Sync the lighting luts for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { if (uniform_block_data.lut_dirty[index]) { - SyncLightingLUT(index); + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index]) { + lighting_lut_data[index] = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, index * new_data.size() * sizeof(GLvec2), + new_data.size() * sizeof(GLvec2), new_data.data()); + } uniform_block_data.lut_dirty[index] = false; } } // Sync the fog lut if (uniform_block_data.fog_lut_dirty) { - SyncFogLUT(); + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data) { + fog_lut_data = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), + new_data.data()); + } uniform_block_data.fog_lut_dirty = false; } + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto SyncProcTexValueLUT = [](const std::array& lut, + std::array& lut_data, GLuint buffer) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data) { + lut_data = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, buffer); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), + new_data.data()); + } + }; + // Sync the proctex noise lut if (uniform_block_data.proctex_noise_lut_dirty) { - SyncProcTexNoiseLUT(); + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + proctex_noise_lut_buffer.handle); uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map if (uniform_block_data.proctex_color_map_dirty) { - SyncProcTexColorMap(); + SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, + proctex_color_map_buffer.handle); uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map if (uniform_block_data.proctex_alpha_map_dirty) { - SyncProcTexAlphaMap(); + SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, + proctex_alpha_map_buffer.handle); uniform_block_data.proctex_alpha_map_dirty = false; } // Sync the proctex lut if (uniform_block_data.proctex_lut_dirty) { - SyncProcTexLUT(); + std::array new_data; + + std::transform(Pica::g_state.proctex.color_table.begin(), + Pica::g_state.proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data) { + proctex_lut_data = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), + new_data.data()); + } uniform_block_data.proctex_lut_dirty = false; } // Sync the proctex difference lut if (uniform_block_data.proctex_diff_lut_dirty) { - SyncProcTexDiffLUT(); + std::array new_data; + + std::transform(Pica::g_state.proctex.color_diff_table.begin(), + Pica::g_state.proctex.color_diff_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data) { + proctex_diff_lut_data = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), + new_data.data()); + } uniform_block_data.proctex_diff_lut_dirty = false; } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1d2c27ddd..08816cc69 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -148,18 +148,10 @@ private: /// Syncs the fog states to match the PICA register void SyncFogColor(); - void SyncFogLUT(); /// Sync the procedural texture noise configuration to match the PICA register void SyncProcTexNoise(); - /// Sync the procedural texture lookup tables - void SyncProcTexNoiseLUT(); - void SyncProcTexColorMap(); - void SyncProcTexAlphaMap(); - void SyncProcTexLUT(); - void SyncProcTexDiffLUT(); - /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); @@ -190,9 +182,6 @@ private: /// Syncs the lighting global ambient color to match the PICA register void SyncGlobalAmbient(); - /// Syncs the lighting lookup tables - void SyncLightingLUT(unsigned index); - /// Syncs the specified light's specular 0 color to match the PICA register void SyncLightSpecular0(int light_index); From a8396cdbedf611e8f9d4bb7af0ebceab9b06592b Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 12:54:50 +0200 Subject: [PATCH 04/12] renderer_opengl: Create shared texture buffer. --- .../renderer_opengl/gl_rasterizer.cpp | 20 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer.h | 5 +++++ .../renderer_opengl/gl_shader_gen.cpp | 2 ++ .../renderer_opengl/gl_shader_manager.cpp | 2 ++ src/video_core/renderer_opengl/gl_state.cpp | 20 +++++++++++++++++++ src/video_core/renderer_opengl/gl_state.h | 10 ++++++++++ 6 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 26e66640b..80626a939 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -37,7 +37,8 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), - index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { + index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE), + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) { allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size && GLAD_GL_ARB_framebuffer_no_attachments; @@ -125,6 +126,17 @@ RasterizerOpenGL::RasterizerOpenGL() // Create render framebuffer framebuffer.Create(); + // Allocate and bind texture buffer lut textures + texture_buffer_lut_rg.Create(); + texture_buffer_lut_rgba.Create(); + state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle; + state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; + state.Apply(); + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); + // Allocate and bind lighting lut textures lighting_lut.Create(); state.lighting_lut.texture_buffer = lighting_lut.handle; @@ -1931,6 +1943,12 @@ void RasterizerOpenGL::SyncShadowBias() { } void RasterizerOpenGL::SyncAndUploadLUTs() { + constexpr size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(GLvec2) * 128 + // fog + sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha + sizeof(GLvec4) * 256 + // proctex + sizeof(GLvec4) * 256; // proctex diff + // Sync the lighting luts for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { if (uniform_block_data.lut_dirty[index]) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 08816cc69..dd1fdae7f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -266,6 +266,7 @@ private: static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024; static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; OGLVertexArray sw_vao; // VAO for software shader draw OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw @@ -275,6 +276,7 @@ private: OGLStreamBuffer vertex_buffer; OGLStreamBuffer uniform_buffer; OGLStreamBuffer index_buffer; + OGLStreamBuffer texture_buffer; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; size_t uniform_size_aligned_vs; @@ -283,6 +285,9 @@ private: SamplerInfo texture_cube_sampler; + OGLTexture texture_buffer_lut_rg; + OGLTexture texture_buffer_lut_rgba; + OGLBuffer lighting_lut_buffer; OGLTexture lighting_lut; std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 6a8f1df44..0d3a955a4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1222,6 +1222,8 @@ uniform sampler2D tex0; uniform sampler2D tex1; uniform sampler2D tex2; uniform samplerCube tex_cube; +uniform samplerBuffer texture_buffer_lut_rg; +uniform samplerBuffer texture_buffer_lut_rgba; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; uniform samplerBuffer proctex_noise_lut; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index e70b5d87a..66b2db9ed 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -55,6 +55,8 @@ static void SetShaderSamplerBindings(GLuint shader) { SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); + SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT); SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 95dbd591b..217cdb316 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -55,6 +55,9 @@ OpenGLState::OpenGLState() { texture_cube_unit.texture_cube = 0; texture_cube_unit.sampler = 0; + texture_buffer_lut_rg.texture_buffer = 0; + texture_buffer_lut_rgba.texture_buffer = 0; + lighting_lut.texture_buffer = 0; fog_lut.texture_buffer = 0; @@ -221,6 +224,19 @@ void OpenGLState::Apply() const { glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler); } + // Texture buffer LUTs + if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) { + glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rg.texture_buffer); + } + + // Texture buffer LUTs + if (texture_buffer_lut_rgba.texture_buffer != + cur_state.texture_buffer_lut_rgba.texture_buffer) { + glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); + } + // Lighting LUTs if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { glActiveTexture(TextureUnits::LightingLUT.Enum()); @@ -374,6 +390,10 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { } if (texture_cube_unit.texture_cube == handle) texture_cube_unit.texture_cube = 0; + if (texture_buffer_lut_rg.texture_buffer == handle) + texture_buffer_lut_rg.texture_buffer = 0; + if (texture_buffer_lut_rgba.texture_buffer == handle) + texture_buffer_lut_rgba.texture_buffer = 0; if (lighting_lut.texture_buffer == handle) lighting_lut.texture_buffer = 0; if (fog_lut.texture_buffer == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index ebc217349..64ded0b80 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -28,6 +28,8 @@ constexpr TextureUnit ProcTexAlphaMap{7}; constexpr TextureUnit ProcTexLUT{8}; constexpr TextureUnit ProcTexDiffLUT{9}; constexpr TextureUnit TextureCube{10}; +constexpr TextureUnit TextureBufferLUT_RG{11}; +constexpr TextureUnit TextureBufferLUT_RGBA{12}; } // namespace TextureUnits @@ -103,6 +105,14 @@ public: GLuint sampler; // GL_SAMPLER_BINDING } texture_cube_unit; + struct { + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER + } texture_buffer_lut_rg; + + struct { + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER + } texture_buffer_lut_rgba; + struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } lighting_lut; From 5a9cde138dea9f40a0307890a7d53936dd4073b2 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 14:47:25 +0200 Subject: [PATCH 05/12] gl_rasterizer: Add a new dirty flag for any lighting lut. --- .../renderer_opengl/gl_rasterizer.cpp | 37 +++++++++++-------- .../renderer_opengl/gl_rasterizer.h | 3 +- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 80626a939..84232e7f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -67,7 +67,8 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.dirty = true; - uniform_block_data.lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty.fill(true); + uniform_block_data.lighting_lut_dirty_any = true; uniform_block_data.fog_lut_dirty = true; @@ -1382,7 +1383,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + uniform_block_data.lighting_lut_dirty_any = true; break; } } @@ -1950,24 +1952,27 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { sizeof(GLvec4) * 256; // proctex diff // Sync the lighting luts - for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { - if (uniform_block_data.lut_dirty[index]) { - std::array new_data; - const auto& source_lut = Pica::g_state.lighting.luts[index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), - [](const auto& entry) { - return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; - }); + if (uniform_block_data.lighting_lut_dirty_any) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index]) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); - if (new_data != lighting_lut_data[index]) { - lighting_lut_data[index] = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, index * new_data.size() * sizeof(GLvec2), - new_data.size() * sizeof(GLvec2), new_data.data()); + if (new_data != lighting_lut_data[index]) { + lighting_lut_data[index] = new_data; + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, index * new_data.size() * sizeof(GLvec2), + new_data.size() * sizeof(GLvec2), new_data.data()); + } + uniform_block_data.lighting_lut_dirty[index] = false; } - uniform_block_data.lut_dirty[index] = false; } } + uniform_block_data.lighting_lut_dirty_any = false; // Sync the fog lut if (uniform_block_data.fog_lut_dirty) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index dd1fdae7f..8b33c9573 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -250,7 +250,8 @@ private: struct { UniformData data; - std::array lut_dirty; + std::array lighting_lut_dirty; + bool lighting_lut_dirty_any; bool fog_lut_dirty; bool proctex_noise_lut_dirty; bool proctex_color_map_dirty; From 4679487640c5c80d9d6bee66370ebf89b53de700 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:09:57 +0200 Subject: [PATCH 06/12] gl_rasterizer: Use the shared texture buffer for the lighting lut. --- .../renderer_opengl/gl_rasterizer.cpp | 47 ++++++++++--------- .../renderer_opengl/gl_rasterizer.h | 2 - .../renderer_opengl/gl_shader_gen.cpp | 3 +- .../renderer_opengl/gl_shader_manager.cpp | 1 - src/video_core/renderer_opengl/gl_state.cpp | 10 ---- src/video_core/renderer_opengl/gl_state.h | 5 -- 6 files changed, 27 insertions(+), 41 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 84232e7f4..a7e8126e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -78,9 +78,6 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.proctex_lut_dirty = true; uniform_block_data.proctex_diff_lut_dirty = true; - for (int i = 0; i < 24; i++) - uniform_block_data.data.lighting_lut_offset[i / 4][i % 4] = 256 * i; - glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); @@ -138,18 +135,6 @@ RasterizerOpenGL::RasterizerOpenGL() glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); - // Allocate and bind lighting lut textures - lighting_lut.Create(); - state.lighting_lut.texture_buffer = lighting_lut.handle; - state.Apply(); - lighting_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, - sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr, - GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::LightingLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle); - // Setup the LUT for the fog fog_lut.Create(); state.fog_lut.texture_buffer = fog_lut.handle; @@ -1951,10 +1936,25 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { sizeof(GLvec4) * 256 + // proctex sizeof(GLvec4) * 256; // proctex diff + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty && + !uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + u8* buffer; + GLintptr offset; + bool invalidate; + size_t bytes_used = 0; + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4)); + // Sync the lighting luts - if (uniform_block_data.lighting_lut_dirty_any) { + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { - if (uniform_block_data.lighting_lut_dirty[index]) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { std::array new_data; const auto& source_lut = Pica::g_state.lighting.luts[index]; std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), @@ -1962,11 +1962,14 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; }); - if (new_data != lighting_lut_data[index]) { + if (new_data != lighting_lut_data[index] || invalidate) { lighting_lut_data[index] = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, index * new_data.size() * sizeof(GLvec2), - new_data.size() * sizeof(GLvec2), new_data.data()); + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); } uniform_block_data.lighting_lut_dirty[index] = false; } @@ -2068,6 +2071,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { } uniform_block_data.proctex_diff_lut_dirty = false; } + + texture_buffer.Unmap(bytes_used); } void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8b33c9573..8eb78a4f9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -289,8 +289,6 @@ private: OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rgba; - OGLBuffer lighting_lut_buffer; - OGLTexture lighting_lut; std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; OGLBuffer fog_lut_buffer; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0d3a955a4..312a41f00 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1224,7 +1224,6 @@ uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; -uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; uniform samplerBuffer proctex_noise_lut; uniform samplerBuffer proctex_color_map; @@ -1252,7 +1251,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { } float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(lighting_lut, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 66b2db9ed..c14744e6b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -57,7 +57,6 @@ static void SetShaderSamplerBindings(GLuint shader) { // Set the texture samplers to correspond to different lookup table texture units SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); - SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT); SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 217cdb316..005523ce3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,8 +58,6 @@ OpenGLState::OpenGLState() { texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rgba.texture_buffer = 0; - lighting_lut.texture_buffer = 0; - fog_lut.texture_buffer = 0; proctex_lut.texture_buffer = 0; @@ -237,12 +235,6 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } - // Lighting LUTs - if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { - glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); - } - // Fog LUT if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) { glActiveTexture(TextureUnits::FogLUT.Enum()); @@ -394,8 +386,6 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { texture_buffer_lut_rg.texture_buffer = 0; if (texture_buffer_lut_rgba.texture_buffer == handle) texture_buffer_lut_rgba.texture_buffer = 0; - if (lighting_lut.texture_buffer == handle) - lighting_lut.texture_buffer = 0; if (fog_lut.texture_buffer == handle) fog_lut.texture_buffer = 0; if (proctex_noise_lut.texture_buffer == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 64ded0b80..4a0745720 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,7 +20,6 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit LightingLUT{3}; constexpr TextureUnit FogLUT{4}; constexpr TextureUnit ProcTexNoiseLUT{5}; constexpr TextureUnit ProcTexColorMap{6}; @@ -113,10 +112,6 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rgba; - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } lighting_lut; - struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } fog_lut; From 63fb7dcc1bd6afe32529f5891e28b6723caec662 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:16:49 +0200 Subject: [PATCH 07/12] gl_rasterizer: Use the shared texture buffer for the fog lut. --- .../renderer_opengl/gl_rasterizer.cpp | 21 ++++++------------- .../renderer_opengl/gl_rasterizer.h | 3 --- .../renderer_opengl/gl_shader_gen.cpp | 4 ++-- .../renderer_opengl/gl_shader_manager.cpp | 1 - src/video_core/renderer_opengl/gl_state.cpp | 10 --------- src/video_core/renderer_opengl/gl_state.h | 5 ----- 6 files changed, 8 insertions(+), 36 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a7e8126e3..d1e00efa2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -135,16 +135,6 @@ RasterizerOpenGL::RasterizerOpenGL() glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); - // Setup the LUT for the fog - fog_lut.Create(); - state.fog_lut.texture_buffer = fog_lut.handle; - state.Apply(); - fog_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::FogLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, fog_lut_buffer.handle); - // Setup the noise LUT for proctex proctex_noise_lut.Create(); state.proctex_noise_lut.texture_buffer = proctex_noise_lut.handle; @@ -1978,7 +1968,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { uniform_block_data.lighting_lut_dirty_any = false; // Sync the fog lut - if (uniform_block_data.fog_lut_dirty) { + if (uniform_block_data.fog_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), @@ -1986,11 +1976,12 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; }); - if (new_data != fog_lut_data) { + if (new_data != fog_lut_data || invalidate) { fog_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), - new_data.data()); + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + uniform_block_data.data.fog_lut_offset = (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); } uniform_block_data.fog_lut_dirty = false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8eb78a4f9..8e532d907 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -290,9 +290,6 @@ private: OGLTexture texture_buffer_lut_rgba; std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; - - OGLBuffer fog_lut_buffer; - OGLTexture fog_lut; std::array fog_lut_data{}; OGLBuffer proctex_noise_lut_buffer; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 312a41f00..58c83a775 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1224,7 +1224,6 @@ uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; -uniform samplerBuffer fog_lut; uniform samplerBuffer proctex_noise_lut; uniform samplerBuffer proctex_color_map; uniform samplerBuffer proctex_alpha_map; @@ -1494,7 +1493,8 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; - out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i) + fog_lut_offset).rg;\n"; + out += "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + " + "fog_lut_offset).rg;\n"; out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index c14744e6b..318037267 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -57,7 +57,6 @@ static void SetShaderSamplerBindings(GLuint shader) { // Set the texture samplers to correspond to different lookup table texture units SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); - SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 005523ce3..2b58f673e 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,8 +58,6 @@ OpenGLState::OpenGLState() { texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rgba.texture_buffer = 0; - fog_lut.texture_buffer = 0; - proctex_lut.texture_buffer = 0; proctex_diff_lut.texture_buffer = 0; proctex_color_map.texture_buffer = 0; @@ -235,12 +233,6 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } - // Fog LUT - if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) { - glActiveTexture(TextureUnits::FogLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer); - } - // ProcTex Noise LUT if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) { glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); @@ -386,8 +378,6 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { texture_buffer_lut_rg.texture_buffer = 0; if (texture_buffer_lut_rgba.texture_buffer == handle) texture_buffer_lut_rgba.texture_buffer = 0; - if (fog_lut.texture_buffer == handle) - fog_lut.texture_buffer = 0; if (proctex_noise_lut.texture_buffer == handle) proctex_noise_lut.texture_buffer = 0; if (proctex_color_map.texture_buffer == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 4a0745720..759b94569 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,7 +20,6 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit FogLUT{4}; constexpr TextureUnit ProcTexNoiseLUT{5}; constexpr TextureUnit ProcTexColorMap{6}; constexpr TextureUnit ProcTexAlphaMap{7}; @@ -112,10 +111,6 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rgba; - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } fog_lut; - struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_noise_lut; From 1ca6d2ea8d230754ace2e24c854efdf59c3dc795 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:28:55 +0200 Subject: [PATCH 08/12] gl_rasterizer: Use the shared texture buffer for the noise, color and alpha map. --- .../renderer_opengl/gl_rasterizer.cpp | 56 +++++-------------- .../renderer_opengl/gl_rasterizer.h | 9 --- .../renderer_opengl/gl_shader_gen.cpp | 13 ++--- .../renderer_opengl/gl_shader_manager.cpp | 3 - src/video_core/renderer_opengl/gl_state.cpp | 27 --------- src/video_core/renderer_opengl/gl_state.h | 15 ----- 6 files changed, 19 insertions(+), 104 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d1e00efa2..82426dcf1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -135,36 +135,6 @@ RasterizerOpenGL::RasterizerOpenGL() glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); - // Setup the noise LUT for proctex - proctex_noise_lut.Create(); - state.proctex_noise_lut.texture_buffer = proctex_noise_lut.handle; - state.Apply(); - proctex_noise_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_noise_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_noise_lut_buffer.handle); - - // Setup the color map for proctex - proctex_color_map.Create(); - state.proctex_color_map.texture_buffer = proctex_color_map.handle; - state.Apply(); - proctex_color_map_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_color_map_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_color_map_buffer.handle); - - // Setup the alpha map for proctex - proctex_alpha_map.Create(); - state.proctex_alpha_map.texture_buffer = proctex_alpha_map.handle; - state.Apply(); - proctex_alpha_map_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_alpha_map_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_alpha_map_buffer.handle); - // Setup the LUT for proctex proctex_lut.Create(); state.proctex_lut.texture_buffer = proctex_lut.handle; @@ -1987,39 +1957,41 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { } // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap - auto SyncProcTexValueLUT = [](const std::array& lut, - std::array& lut_data, GLuint buffer) { + auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( + const std::array& lut, + std::array& lut_data, GLint& lut_offset) { std::array new_data; std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; }); - if (new_data != lut_data) { + if (new_data != lut_data || invalidate) { lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, buffer); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), - new_data.data()); + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2)); + lut_offset = (offset + bytes_used) / sizeof(GLvec2); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec2); } }; // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty) { + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - proctex_noise_lut_buffer.handle); + uniform_block_data.data.proctex_noise_lut_offset); uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty) { + if (uniform_block_data.proctex_color_map_dirty || invalidate) { SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - proctex_color_map_buffer.handle); + uniform_block_data.data.proctex_color_map_offset); uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty) { + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - proctex_alpha_map_buffer.handle); + uniform_block_data.data.proctex_alpha_map_offset); uniform_block_data.proctex_alpha_map_dirty = false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8e532d907..d2fc8ead1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -291,17 +291,8 @@ private: std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; std::array fog_lut_data{}; - - OGLBuffer proctex_noise_lut_buffer; - OGLTexture proctex_noise_lut; std::array proctex_noise_lut_data{}; - - OGLBuffer proctex_color_map_buffer; - OGLTexture proctex_color_map; std::array proctex_color_map_data{}; - - OGLBuffer proctex_alpha_map_buffer; - OGLTexture proctex_alpha_map; std::array proctex_alpha_map_data{}; OGLBuffer proctex_lut_buffer; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 58c83a775..063bf9d70 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1113,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) { float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - float x_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.y); + float x_noise = ProcTexLookupLUT(texture_buffer_lut_rg, proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(texture_buffer_lut_rg, proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); @@ -1156,7 +1156,7 @@ float ProcTexNoiseCoef(vec2 x) { // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map", + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "texture_buffer_lut_rg", "proctex_color_map_offset"); out += ";\n"; @@ -1188,8 +1188,8 @@ float ProcTexNoiseCoef(vec2 x) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map", - "proctex_alpha_map_offset"); + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, + "texture_buffer_lut_rg", "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { @@ -1224,9 +1224,6 @@ uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; -uniform samplerBuffer proctex_noise_lut; -uniform samplerBuffer proctex_color_map; -uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_diff_lut; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 318037267..19e163804 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -57,9 +57,6 @@ static void SetShaderSamplerBindings(GLuint shader) { // Set the texture samplers to correspond to different lookup table texture units SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); - SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); - SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); - SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap); SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 2b58f673e..dd019b8f1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -60,9 +60,6 @@ OpenGLState::OpenGLState() { proctex_lut.texture_buffer = 0; proctex_diff_lut.texture_buffer = 0; - proctex_color_map.texture_buffer = 0; - proctex_alpha_map.texture_buffer = 0; - proctex_noise_lut.texture_buffer = 0; image_shadow_buffer = 0; image_shadow_texture_px = 0; @@ -233,24 +230,6 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } - // ProcTex Noise LUT - if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer); - } - - // ProcTex Color Map - if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer); - } - - // ProcTex Alpha Map - if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer); - } - // ProcTex LUT if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) { glActiveTexture(TextureUnits::ProcTexLUT.Enum()); @@ -378,12 +357,6 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { texture_buffer_lut_rg.texture_buffer = 0; if (texture_buffer_lut_rgba.texture_buffer == handle) texture_buffer_lut_rgba.texture_buffer = 0; - if (proctex_noise_lut.texture_buffer == handle) - proctex_noise_lut.texture_buffer = 0; - if (proctex_color_map.texture_buffer == handle) - proctex_color_map.texture_buffer = 0; - if (proctex_alpha_map.texture_buffer == handle) - proctex_alpha_map.texture_buffer = 0; if (proctex_lut.texture_buffer == handle) proctex_lut.texture_buffer = 0; if (proctex_diff_lut.texture_buffer == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 759b94569..4f939be31 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,9 +20,6 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit ProcTexNoiseLUT{5}; -constexpr TextureUnit ProcTexColorMap{6}; -constexpr TextureUnit ProcTexAlphaMap{7}; constexpr TextureUnit ProcTexLUT{8}; constexpr TextureUnit ProcTexDiffLUT{9}; constexpr TextureUnit TextureCube{10}; @@ -111,18 +108,6 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rgba; - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_noise_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_color_map; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_alpha_map; - struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } proctex_lut; From 831d4f9aeb291b64f29b2198d3756af183ebf9d6 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:38:48 +0200 Subject: [PATCH 09/12] gl_rasterizer: Use the shared texture buffer for the proctex lut. --- .../renderer_opengl/gl_rasterizer.cpp | 43 ++++++------------- .../renderer_opengl/gl_rasterizer.h | 6 --- .../renderer_opengl/gl_shader_gen.cpp | 9 ++-- .../renderer_opengl/gl_shader_manager.cpp | 2 - src/video_core/renderer_opengl/gl_state.cpp | 19 -------- src/video_core/renderer_opengl/gl_state.h | 10 ----- 6 files changed, 17 insertions(+), 72 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 82426dcf1..1c613df8e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -135,26 +135,6 @@ RasterizerOpenGL::RasterizerOpenGL() glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); - // Setup the LUT for proctex - proctex_lut.Create(); - state.proctex_lut.texture_buffer = proctex_lut.handle; - state.Apply(); - proctex_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_lut_buffer.handle); - - // Setup the difference LUT for proctex - proctex_diff_lut.Create(); - state.proctex_diff_lut.texture_buffer = proctex_diff_lut.handle; - state.Apply(); - proctex_diff_lut_buffer.Create(); - glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); - glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW); - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); - // Bind index buffer for hardware shader path state.draw.vertex_array = hw_vao.handle; state.Apply(); @@ -1996,7 +1976,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { } // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty) { + if (uniform_block_data.proctex_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.proctex.color_table.begin(), @@ -2006,17 +1986,18 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; }); - if (new_data != proctex_lut_data) { + if (new_data != proctex_lut_data || invalidate) { proctex_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), - new_data.data()); + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_lut_offset = (offset + bytes_used) / sizeof(GLvec4); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); } uniform_block_data.proctex_lut_dirty = false; } // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty) { + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.proctex.color_diff_table.begin(), @@ -2026,11 +2007,13 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; }); - if (new_data != proctex_diff_lut_data) { + if (new_data != proctex_diff_lut_data || invalidate) { proctex_diff_lut_data = new_data; - glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle); - glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), - new_data.data()); + std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4)); + uniform_block_data.data.proctex_diff_lut_offset = + (offset + bytes_used) / sizeof(GLvec4); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(GLvec4); } uniform_block_data.proctex_diff_lut_dirty = false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d2fc8ead1..2753ddb79 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -294,13 +294,7 @@ private: std::array proctex_noise_lut_data{}; std::array proctex_color_map_data{}; std::array proctex_alpha_map_data{}; - - OGLBuffer proctex_lut_buffer; - OGLTexture proctex_lut; std::array proctex_lut_data{}; - - OGLBuffer proctex_diff_lut_buffer; - OGLTexture proctex_diff_lut; std::array proctex_diff_lut_data{}; bool allow_shadow; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 063bf9d70..1d1ce9758 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1171,15 +1171,16 @@ float ProcTexNoiseCoef(vec2 x) { out += "int lut_index_i = int(lut_coord) + " + std::to_string(config.state.proctex.lut_offset) + ";\n"; out += "float lut_index_f = fract(lut_coord);\n"; - out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i + proctex_lut_offset) + " + out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, lut_index_i + " + "proctex_lut_offset) + " "lut_index_f * " - "texelFetch(proctex_diff_lut, lut_index_i + proctex_diff_lut_offset);\n"; + "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; - out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)) + " + out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " "proctex_lut_offset);\n"; break; } @@ -1224,8 +1225,6 @@ uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; -uniform samplerBuffer proctex_lut; -uniform samplerBuffer proctex_diff_lut; #if ALLOW_SHADOW layout(r32ui) uniform readonly uimage2D shadow_texture_px; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 19e163804..3b3faea9a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -57,8 +57,6 @@ static void SetShaderSamplerBindings(GLuint shader) { // Set the texture samplers to correspond to different lookup table texture units SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG); SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA); - SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); - SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer); SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index dd019b8f1..0d41242ee 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,9 +58,6 @@ OpenGLState::OpenGLState() { texture_buffer_lut_rg.texture_buffer = 0; texture_buffer_lut_rgba.texture_buffer = 0; - proctex_lut.texture_buffer = 0; - proctex_diff_lut.texture_buffer = 0; - image_shadow_buffer = 0; image_shadow_texture_px = 0; image_shadow_texture_nx = 0; @@ -230,18 +227,6 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } - // ProcTex LUT - if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer); - } - - // ProcTex Diff LUT - if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); - } - // Shadow Images if (image_shadow_buffer != cur_state.image_shadow_buffer) { glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0, @@ -357,10 +342,6 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) { texture_buffer_lut_rg.texture_buffer = 0; if (texture_buffer_lut_rgba.texture_buffer == handle) texture_buffer_lut_rgba.texture_buffer = 0; - if (proctex_lut.texture_buffer == handle) - proctex_lut.texture_buffer = 0; - if (proctex_diff_lut.texture_buffer == handle) - proctex_diff_lut.texture_buffer = 0; if (image_shadow_buffer == handle) image_shadow_buffer = 0; if (image_shadow_texture_px == handle) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 4f939be31..4408506c3 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,8 +20,6 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit ProcTexLUT{8}; -constexpr TextureUnit ProcTexDiffLUT{9}; constexpr TextureUnit TextureCube{10}; constexpr TextureUnit TextureBufferLUT_RG{11}; constexpr TextureUnit TextureBufferLUT_RGBA{12}; @@ -108,14 +106,6 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rgba; - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_diff_lut; - // GL_IMAGE_BINDING_NAME GLuint image_shadow_buffer; GLuint image_shadow_texture_px; From 0838c87dacdb8355ca9a2d7a812d18a58087aac8 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:40:46 +0200 Subject: [PATCH 10/12] gl_stream_buffer: Only flush the host buffer if anything was written. This might happen in the new TBO upload path. --- src/video_core/renderer_opengl/gl_stream_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 1957cfbcc..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -87,7 +87,7 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent) { + if (!coherent && size > 0) { glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } From 46f18d68001723e23b7ff27929a40bfbd70aec40 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 15:43:52 +0200 Subject: [PATCH 11/12] gl_shader_gen: Inline now constant texture buffer. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 1d1ce9758..db61dde21 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -1025,7 +1025,7 @@ void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp m } void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - const std::string& map_lut, const std::string& offset) { + const std::string& offset) { std::string combined; switch (combiner) { case ProcTexCombiner::U: @@ -1063,7 +1063,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, combined = "0.0"; break; } - out += "ProcTexLookupLUT(" + map_lut + ", " + offset + ", " + combined + ")"; + out += "ProcTexLookupLUT(" + offset + ", " + combined + ")"; } void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { @@ -1072,12 +1072,12 @@ void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( -float ProcTexLookupLUT(samplerBuffer lut, int offset, float coord) { +float ProcTexLookupLUT(int offset, float coord) { coord *= 128; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(lut, int(index_i) + offset).rg; + vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; @@ -1113,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) { float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - float x_noise = ProcTexLookupLUT(texture_buffer_lut_rg, proctex_noise_lut_offset, frac.x); - float y_noise = ProcTexLookupLUT(texture_buffer_lut_rg, proctex_noise_lut_offset, frac.y); + float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); @@ -1156,7 +1156,7 @@ float ProcTexNoiseCoef(vec2 x) { // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "texture_buffer_lut_rg", + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map_offset"); out += ";\n"; @@ -1190,7 +1190,7 @@ float ProcTexNoiseCoef(vec2 x) { // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, - "texture_buffer_lut_rg", "proctex_alpha_map_offset"); + "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { From c4ff0ba137fdac10c9400ab1138fd1d71635664a Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Fri, 22 Jun 2018 17:21:28 +0200 Subject: [PATCH 12/12] renderer_opengl: Renumber all texture units. We spend lots of texture units for our texture buffers. As they are now feed from one buffer, there is no need to have the big gap in the list of IDs. --- src/video_core/renderer_opengl/gl_state.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 4408506c3..1cf9b8d36 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,9 +20,9 @@ constexpr TextureUnit PicaTexture(int unit) { return TextureUnit{unit}; } -constexpr TextureUnit TextureCube{10}; -constexpr TextureUnit TextureBufferLUT_RG{11}; -constexpr TextureUnit TextureBufferLUT_RGBA{12}; +constexpr TextureUnit TextureCube{3}; +constexpr TextureUnit TextureBufferLUT_RG{4}; +constexpr TextureUnit TextureBufferLUT_RGBA{5}; } // namespace TextureUnits