From 10fba63b94313a7228ef494634742786b4a03caf Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Sat, 19 May 2018 09:48:41 +0200 Subject: [PATCH] gl_rasterizer: Provide texture buffer offsets. This allows us to move all data into one TBO. --- .../renderer_opengl/gl_rasterizer.cpp | 3 ++ .../renderer_opengl/gl_shader_gen.cpp | 38 ++++++++++++------- .../renderer_opengl/gl_shader_manager.h | 10 ++++- src/video_core/renderer_opengl/pica_to_gl.h | 4 ++ 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1318a332e..d8e1cd5fc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() uniform_block_data.proctex_lut_dirty = true; uniform_block_data.proctex_diff_lut_dirty = true; + for (int i = 0; i < 24; i++) + uniform_block_data.data.lighting_lut_offset[i / 4][i % 4] = 256 * i; + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0ae9794de..6a8f1df44 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,6 +32,7 @@ namespace GLShader { static const std::string UniformBlockDef = R"( #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 struct LightSrc { vec3 specular_0; @@ -55,6 +56,13 @@ layout (std140) uniform shader_data { int scissor_y1; int scissor_x2; int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; vec3 fog_color; vec2 proctex_noise_f; vec2 proctex_noise_a; @@ -1017,7 +1025,7 @@ void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp m } void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - const std::string& map_lut) { + const std::string& map_lut, const std::string& offset) { std::string combined; switch (combiner) { case ProcTexCombiner::U: @@ -1055,7 +1063,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, combined = "0.0"; break; } - out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")"; + out += "ProcTexLookupLUT(" + map_lut + ", " + offset + ", " + combined + ")"; } void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { @@ -1064,12 +1072,12 @@ void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( -float ProcTexLookupLUT(samplerBuffer lut, float coord) { +float ProcTexLookupLUT(samplerBuffer lut, int offset, float coord) { coord *= 128; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(lut, int(index_i)).rg; + vec2 entry = texelFetch(lut, int(index_i) + offset).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; @@ -1105,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) { float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y); + float x_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.x); + float y_noise = ProcTexLookupLUT(proctex_noise_lut, proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); @@ -1148,7 +1156,8 @@ float ProcTexNoiseCoef(vec2 x) { // Combine and map out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map", + "proctex_color_map_offset"); out += ";\n"; // Look up color @@ -1162,14 +1171,16 @@ float ProcTexNoiseCoef(vec2 x) { out += "int lut_index_i = int(lut_coord) + " + std::to_string(config.state.proctex.lut_offset) + ";\n"; out += "float lut_index_f = fract(lut_coord);\n"; - out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i) + lut_index_f * " - "texelFetch(proctex_diff_lut, lut_index_i);\n"; + out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i + proctex_lut_offset) + " + "lut_index_f * " + "texelFetch(proctex_diff_lut, lut_index_i + proctex_diff_lut_offset);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n"; - out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)));\n"; + out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)) + " + "proctex_lut_offset);\n"; break; } @@ -1177,7 +1188,8 @@ float ProcTexNoiseCoef(vec2 x) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map"); + AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map", + "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { @@ -1238,7 +1250,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { } float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg; + vec2 entry = texelFetch(lighting_lut, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } @@ -1481,7 +1493,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; - out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n"; + out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i) + fog_lut_offset).rg;\n"; out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 73acc3297..3233f99e7 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,6 +6,7 @@ #include #include +#include "video_core/regs_lighting.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/pica_to_gl.h" @@ -38,6 +39,13 @@ struct UniformData { GLint scissor_y1; GLint scissor_x2; GLint scissor_y2; + GLint fog_lut_offset; + GLint proctex_noise_lut_offset; + GLint proctex_color_map_offset; + GLint proctex_alpha_map_offset; + GLint proctex_lut_offset; + GLint proctex_diff_lut_offset; + alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4]; alignas(16) GLvec3 fog_color; alignas(8) GLvec2 proctex_noise_f; alignas(8) GLvec2 proctex_noise_a; @@ -50,7 +58,7 @@ struct UniformData { }; static_assert( - sizeof(UniformData) == 0x470, + sizeof(UniformData) == 0x4e0, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 45d4bc4bb..faada1556 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -23,6 +23,10 @@ using GLuvec2 = std::array; using GLuvec3 = std::array; using GLuvec4 = std::array; +using GLivec2 = std::array; +using GLivec3 = std::array; +using GLivec4 = std::array; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) {