diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6a5d5764b..1e12f4ac2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -32,6 +32,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_manager.cpp + renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cacecbed2..8b3a4aa84 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -177,6 +177,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle); + shader_program_manager = + std::make_unique(GLAD_GL_ARB_separate_shader_objects); + glEnable(GL_BLEND); SyncEntireState(); @@ -490,6 +493,11 @@ void RasterizerOpenGL::DrawTriangles() { state.scissor.height = draw_rect.GetHeight(); state.Apply(); + shader_program_manager->UseTrivialVertexShader(); + shader_program_manager->UseTrivialGeometryShader(); + shader_program_manager->ApplyTo(state); + state.Apply(); + // Draw the vertex batch size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); for (size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { @@ -1258,95 +1266,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig( void RasterizerOpenGL::SetShader() { auto config = GLShader::PicaShaderConfig::BuildFromRegs(Pica::g_state.regs); - std::unique_ptr shader = std::make_unique(); - - // Find (or generate) the GLSL shader for the current TEV state - auto cached_shader = shader_cache.find(config); - if (cached_shader != shader_cache.end()) { - current_shader = cached_shader->second.get(); - - state.draw.shader_program = current_shader->shader.handle; - state.Apply(); - } else { - LOG_DEBUG(Render_OpenGL, "Creating new shader"); - - shader->shader.Create(GLShader::GenerateVertexShader().c_str(), - GLShader::GenerateFragmentShader(config).c_str()); - - state.draw.shader_program = shader->shader.handle; - state.Apply(); - - // Set the texture samplers to correspond to different texture units - GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id); - } - uniform_tex = glGetUniformLocation(shader->shader.handle, "tex_cube"); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::TextureCube.id); - } - - // Set the texture samplers to correspond to different lookup table texture units - GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, TextureUnits::LightingLUT.id); - } - - GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); - if (uniform_fog_lut != -1) { - glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id); - } - - GLint uniform_proctex_noise_lut = - glGetUniformLocation(shader->shader.handle, "proctex_noise_lut"); - if (uniform_proctex_noise_lut != -1) { - glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id); - } - - GLint uniform_proctex_color_map = - glGetUniformLocation(shader->shader.handle, "proctex_color_map"); - if (uniform_proctex_color_map != -1) { - glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id); - } - - GLint uniform_proctex_alpha_map = - glGetUniformLocation(shader->shader.handle, "proctex_alpha_map"); - if (uniform_proctex_alpha_map != -1) { - glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id); - } - - GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); - if (uniform_proctex_lut != -1) { - glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id); - } - - GLint uniform_proctex_diff_lut = - glGetUniformLocation(shader->shader.handle, "proctex_diff_lut"); - if (uniform_proctex_diff_lut != -1) { - glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id); - } - - current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); - - GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); - if (block_index != GL_INVALID_INDEX) { - GLint block_size; - glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, - GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); - ASSERT_MSG(block_size == sizeof(UniformData), - "Uniform block size did not match! Got {}, expected {}", - static_cast(block_size), sizeof(UniformData)); - glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - } + shader_program_manager->UseFragmentShader(config); } void RasterizerOpenGL::SyncClipEnabled() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 999550a23..fd72e9f1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -8,12 +8,10 @@ #include #include #include -#include #include #include #include "common/bit_field.h" #include "common/common_types.h" -#include "common/hash.h" #include "common/vector_math.h" #include "core/hw/gpu.h" #include "video_core/pica_state.h" @@ -25,13 +23,14 @@ #include "video_core/regs_texturing.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/shader/shader.h" struct ScreenInfo; +class ShaderProgramManager; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -52,12 +51,6 @@ public: bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override; - /// OpenGL shader generated for a given Pica register state - struct PicaShader { - /// OpenGL shader resource - OGLShader shader; - }; - private: struct SamplerInfo { using TextureConfig = Pica::TexturingRegs::TextureConfig; @@ -121,47 +114,6 @@ private: GLfloat view[3]; }; - struct LightSrc { - alignas(16) GLvec3 specular_0; - alignas(16) GLvec3 specular_1; - alignas(16) GLvec3 diffuse; - alignas(16) GLvec3 ambient; - alignas(16) GLvec3 position; - alignas(16) GLvec3 spot_direction; // negated - GLfloat dist_atten_bias; - GLfloat dist_atten_scale; - }; - - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData { - GLint framebuffer_scale; - GLint alphatest_ref; - GLfloat depth_scale; - GLfloat depth_offset; - GLint scissor_x1; - GLint scissor_y1; - GLint scissor_x2; - GLint scissor_y2; - alignas(16) GLvec3 fog_color; - alignas(8) GLvec2 proctex_noise_f; - alignas(8) GLvec2 proctex_noise_a; - alignas(8) GLvec2 proctex_noise_p; - alignas(16) GLvec3 lighting_global_ambient; - LightSrc light_src[8]; - alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages - alignas(16) GLvec4 tev_combiner_buffer_color; - alignas(16) GLvec4 clip_coef; - }; - - static_assert( - sizeof(UniformData) == 0x460, - "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - /// Syncs entire status to match PICA registers void SyncEntireState(); @@ -269,8 +221,6 @@ private: std::vector vertex_batch; - std::unordered_map> shader_cache; - const PicaShader* current_shader = nullptr; bool shader_dirty; struct { @@ -285,6 +235,8 @@ private: bool dirty; } uniform_block_data = {}; + std::unique_ptr shader_program_manager; + std::array texture_samplers; OGLVertexArray vertex_array; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 1b01d3ad5..2267af25c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -377,7 +377,7 @@ private: OGLVertexArray attributeless_vao; OGLBuffer d24s8_abgr_buffer; GLsizeiptr d24s8_abgr_buffer_size; - OGLShader d24s8_abgr_shader; + OGLProgram d24s8_abgr_shader; GLint d24s8_abgr_tbo_size_u_id; GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 44f7b2c71..dcaa9a0f3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "common/common_types.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -96,11 +97,53 @@ public: return *this; } - /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* frag_shader) { + void Create(const char* source, GLenum type) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, frag_shader); + if (source == nullptr) + return; + handle = GLShader::LoadShader(source, type); + } + + void Release() { + if (handle == 0) + return; + glDeleteShader(handle); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLProgram : private NonCopyable { +public: + OGLProgram() = default; + + OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} + + ~OGLProgram() { + Release(); + } + + OGLProgram& operator=(OGLProgram&& o) { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new program from given shader objects + void Create(bool separable_program, const std::vector& shaders) { + if (handle != 0) + return; + handle = GLShader::LoadProgram(separable_program, shaders); + } + + /// Creates a new program from given shader soruce code + void Create(const char* vert_shader, const char* frag_shader) { + OGLShader vert, frag; + vert.Create(vert_shader, GL_VERTEX_SHADER); + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + Create(false, {vert.handle, frag.handle}); } /// Deletes the internal OpenGL resource @@ -115,6 +158,38 @@ public: GLuint handle = 0; }; +class OGLPipeline : private NonCopyable { +public: + OGLPipeline() = default; + OGLPipeline(OGLPipeline&& o) { + handle = std::exchange(o.handle, 0); + } + ~OGLPipeline() { + Release(); + } + OGLPipeline& operator=(OGLPipeline&& o) { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + void Create() { + if (handle != 0) + return; + glGenProgramPipelines(1, &handle); + } + + void Release() { + if (handle == 0) + return; + glDeleteProgramPipelines(1, &handle); + OpenGLState::GetCurState().ResetPipeline(handle).Apply(); + handle = 0; + } + + GLuint handle = 0; +}; + class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 68a74f3f2..79a034d6a 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -61,6 +61,37 @@ layout (std140) uniform shader_data { }; )"; +static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) { + std::string out; + + auto append_variable = [&](const char* var, int location) { + if (separable_shader) { + out += "layout (location=" + std::to_string(location) + ") "; + } + out += std::string(is_output ? "out " : "in ") + var + ";\n"; + }; + + append_variable("vec4 primary_color", ATTRIBUTE_COLOR); + append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0); + append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1); + append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2); + append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W); + append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); + append_variable("vec3 view", ATTRIBUTE_VIEW); + + if (is_output && separable_shader) { + // gl_PerVertex redeclaration is required for separate shader object + out += R"( +out gl_PerVertex { + vec4 gl_Position; + float gl_ClipDistance[2]; +}; +)"; + } + + return out; +} + PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { PicaShaderConfig res; @@ -206,11 +237,11 @@ static std::string SampleTexture(const PicaShaderConfig& config, unsigned textur // Only unit 0 respects the texturing type switch (state.texture0_type) { case TexturingRegs::TextureConfig::Texture2D: - return "texture(tex[0], texcoord[0])"; + return "texture(tex0, texcoord0)"; case TexturingRegs::TextureConfig::Projection2D: - return "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; + return "textureProj(tex0, vec3(texcoord0, texcoord0_w))"; case TexturingRegs::TextureConfig::TextureCube: - return "texture(tex_cube, vec3(texcoord[0], texcoord0_w))"; + return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; case TexturingRegs::TextureConfig::Shadow2D: case TexturingRegs::TextureConfig::ShadowCube: NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture"); @@ -220,15 +251,15 @@ static std::string SampleTexture(const PicaShaderConfig& config, unsigned textur LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast(state.texture0_type)); UNIMPLEMENTED(); - return "texture(tex[0], texcoord[0])"; + return "texture(tex0, texcoord0)"; } case 1: - return "texture(tex[1], texcoord[1])"; + return "texture(tex1, texcoord1)"; case 2: if (state.texture2_use_coord1) - return "texture(tex[2], texcoord[1])"; + return "texture(tex2, texcoord1)"; else - return "texture(tex[2], texcoord[2])"; + return "texture(tex2, texcoord2)"; case 3: if (state.proctex.enable) { return "ProcTex()"; @@ -1020,7 +1051,12 @@ float ProcTexNoiseCoef(vec2 x) { } out += "vec4 ProcTex() {\n"; - out += "vec2 uv = abs(texcoord[" + std::to_string(config.state.proctex.coord) + "]);\n"; + if (config.state.proctex.coord < 3) { + out += "vec2 uv = abs(texcoord" + std::to_string(config.state.proctex.coord) + ");\n"; + } else { + NGLOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); + out += "vec2 uv = abs(texcoord0);\n"; + } // Get shift offset before noise generation out += "float u_shift = "; @@ -1085,23 +1121,24 @@ float ProcTexNoiseCoef(vec2 x) { } } -std::string GenerateFragmentShader(const PicaShaderConfig& config) { +std::string GenerateFragmentShader(const PicaShaderConfig& config, bool separable_shader) { const auto& state = config.state; - std::string out = R"( -#version 330 core + std::string out = "#version 330 core\n"; + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } -in vec4 primary_color; -in vec2 texcoord[3]; -in float texcoord0_w; -in vec4 normquat; -in vec3 view; + out += GetVertexInterfaceDeclaration(false, separable_shader); + out += R"( in vec4 gl_FragCoord; out vec4 color; -uniform sampler2D tex[3]; +uniform sampler2D tex0; +uniform sampler2D tex1; +uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer lighting_lut; uniform samplerBuffer fog_lut; @@ -1246,8 +1283,11 @@ vec4 secondary_fragment_color = vec4(0.0); return out; } -std::string GenerateVertexShader() { +std::string GenerateTrivialVertexShader(bool separable_shader) { std::string out = "#version 330 core\n"; + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; @@ -1264,14 +1304,7 @@ std::string GenerateVertexShader() { ") in vec4 vert_normquat;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; - out += R"( -out vec4 primary_color; -out vec2 texcoord[3]; -out float texcoord0_w; -out vec4 normquat; -out vec3 view; - -)"; + out += GetVertexInterfaceDeclaration(true, separable_shader); out += UniformBlockDef; @@ -1279,9 +1312,9 @@ out vec3 view; void main() { primary_color = vert_color; - texcoord[0] = vert_texcoord0; - texcoord[1] = vert_texcoord1; - texcoord[2] = vert_texcoord2; + texcoord0 = vert_texcoord0; + texcoord1 = vert_texcoord1; + texcoord2 = vert_texcoord2; texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 929c3c015..f900e3091 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -9,7 +9,9 @@ #include #include #include +#include "common/hash.h" #include "video_core/regs.h" +#include "video_core/shader/shader.h" namespace GLShader { @@ -132,18 +134,21 @@ struct PicaShaderConfig : Common::HashableStruct { }; /** - * Generates the GLSL vertex shader program source code for the current Pica state + * Generates the GLSL vertex shader program source code that accepts vertices from software shader + * and directly passes them to the fragment shader. + * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::string GenerateVertexShader(); +std::string GenerateTrivialVertexShader(bool separable_shader); /** * Generates the GLSL fragment shader program source code for the current Pica state * @param config ShaderCacheKey object generated for the current Pica state, used for the shader * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::string GenerateFragmentShader(const PicaShaderConfig& config); +std::string GenerateFragmentShader(const PicaShaderConfig& config, bool separable_shader); } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 000000000..30bc2d49d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,216 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "video_core/renderer_opengl/gl_shader_manager.h" + +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index == GL_INVALID_INDEX) { + return; + } + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, "Uniform block size did not match! Got %d, expected %zu", + static_cast(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast(binding)); +} + +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, + sizeof(UniformData)); +} + +static void SetShaderSamplerBinding(GLuint shader, const char* name, + TextureUnits::TextureUnit binding) { + GLint uniform_tex = glGetUniformLocation(shader, name); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, binding.id); + } +} + +static void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + SetShaderSamplerBinding(shader, "tex0", TextureUnits::PicaTexture(0)); + SetShaderSamplerBinding(shader, "tex1", TextureUnits::PicaTexture(1)); + SetShaderSamplerBinding(shader, "tex2", TextureUnits::PicaTexture(2)); + SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube); + + // Set the texture samplers to correspond to different lookup table texture units + SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT); + SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT); + SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT); + SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap); + SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap); + SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); + SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +/** + * An object representing a shader program staging. It can be either a shader object or a program + * object, depending on whether separable program is used. + */ +class OGLShaderStage { +public: + explicit OGLShaderStage(bool separable) { + if (separable) { + shader_or_program = OGLProgram(); + } else { + shader_or_program = OGLShader(); + } + } + + void Create(const char* source, GLenum type) { + if (shader_or_program.which() == 0) { + boost::get(shader_or_program).Create(source, type); + } else { + OGLShader shader; + shader.Create(source, type); + OGLProgram& program = boost::get(shader_or_program); + program.Create(true, {shader.handle}); + SetShaderUniformBlockBindings(program.handle); + SetShaderSamplerBindings(program.handle); + } + } + + GLuint GetHandle() const { + if (shader_or_program.which() == 0) { + return boost::get(shader_or_program).handle; + } else { + return boost::get(shader_or_program).handle; + } + } + +private: + boost::variant shader_or_program; +}; + +class TrivialVertexShader { +public: + explicit TrivialVertexShader(bool separable) : program(separable) { + program.Create(GLShader::GenerateTrivialVertexShader(separable).c_str(), GL_VERTEX_SHADER); + } + GLuint Get() const { + return program.GetHandle(); + } + +private: + OGLShaderStage program; +}; + +template +class ShaderCache { +public: + explicit ShaderCache(bool separable) : separable(separable) {} + GLuint Get(const KeyConfigType& config) { + auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(CodeGenerator(config, separable).c_str(), ShaderType); + } + return cached_shader.GetHandle(); + } + +private: + bool separable; + std::unordered_map shaders; +}; + +using FragmentShaders = + ShaderCache; + +class ShaderProgramManager::Impl { +public: + explicit Impl(bool separable) + : separable(separable), trivial_vertex_shader(separable), fragment_shaders(separable) { + if (separable) + pipeline.Create(); + } + + struct ShaderTuple { + GLuint vs = 0; + GLuint gs = 0; + GLuint fs = 0; + + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + bool operator!=(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) != std::tie(rhs.vs, rhs.gs, rhs.fs); + } + + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + + ShaderTuple current; + + TrivialVertexShader trivial_vertex_shader; + + FragmentShaders fragment_shaders; + + bool separable; + std::unordered_map program_cache; + OGLPipeline pipeline; +}; + +ShaderProgramManager::ShaderProgramManager(bool separable) + : impl(std::make_unique(separable)) {} + +ShaderProgramManager::~ShaderProgramManager() = default; + +void ShaderProgramManager::UseTrivialVertexShader() { + impl->current.vs = impl->trivial_vertex_shader.Get(); +} + +void ShaderProgramManager::UseTrivialGeometryShader() { + impl->current.gs = 0; +} + +void ShaderProgramManager::UseFragmentShader(const GLShader::PicaShaderConfig& config) { + impl->current.fs = impl->fragment_shaders.Get(config); +} + +void ShaderProgramManager::ApplyTo(OpenGLState& state) { + if (impl->separable) { + // Without this reseting, AMD sometimes freezes when one stage is changed but not for the + // others + glUseProgramStages(impl->pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(impl->pipeline.handle, GL_VERTEX_SHADER_BIT, impl->current.vs); + glUseProgramStages(impl->pipeline.handle, GL_GEOMETRY_SHADER_BIT, impl->current.gs); + glUseProgramStages(impl->pipeline.handle, GL_FRAGMENT_SHADER_BIT, impl->current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = impl->pipeline.handle; + } else { + OGLProgram& cached_program = impl->program_cache[impl->current]; + if (cached_program.handle == 0) { + cached_program.Create(false, {impl->current.vs, impl->current.gs, impl->current.fs}); + SetShaderUniformBlockBindings(cached_program.handle); + SetShaderSamplerBindings(cached_program.handle); + } + state.draw.shader_program = cached_program.handle; + } +} diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 000000000..1e59b74aa --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,73 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/pica_to_gl.h" + +enum class UniformBindings : GLuint { Common }; + +struct LightSrc { + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; + alignas(16) GLvec3 spot_direction; // negated + GLfloat dist_atten_bias; + GLfloat dist_atten_scale; +}; + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct UniformData { + GLint framebuffer_scale; + GLint alphatest_ref; + GLfloat depth_scale; + GLfloat depth_offset; + GLint scissor_x1; + GLint scissor_y1; + GLint scissor_x2; + GLint scissor_y2; + alignas(16) GLvec3 fog_color; + alignas(8) GLvec2 proctex_noise_f; + alignas(8) GLvec2 proctex_noise_a; + alignas(8) GLvec2 proctex_noise_p; + alignas(16) GLvec3 lighting_global_ambient; + LightSrc light_src[8]; + alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) GLvec4 tev_combiner_buffer_color; + alignas(16) GLvec4 clip_coef; +}; + +static_assert( + sizeof(UniformData) == 0x460, + "The size of the UniformData structure has changed, update the structure in the shader"); +static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + +/// A class that manage different shader stages and configures them with given config data. +class ShaderProgramManager { +public: + explicit ShaderProgramManager(bool separable); + ~ShaderProgramManager(); + + void UseTrivialVertexShader(); + + void UseTrivialGeometryShader(); + + void UseFragmentShader(const GLShader::PicaShaderConfig& config); + + void ApplyTo(OpenGLState& state); + +private: + class Impl; + std::unique_ptr impl; +}; diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4da241d83..ae8cdf550 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,66 +10,67 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { +GLuint LoadShader(const char* source, GLenum type) { + const char* debug_type; + switch (type) { + case GL_VERTEX_SHADER: + debug_type = "vertex"; + break; + case GL_GEOMETRY_SHADER: + debug_type = "geometry"; + break; + case GL_FRAGMENT_SHADER: + debug_type = "fragment"; + break; + default: + UNREACHABLE(); + } - // Create the shaders - GLuint vertex_shader_id = glCreateShader(GL_VERTEX_SHADER); - GLuint fragment_shader_id = glCreateShader(GL_FRAGMENT_SHADER); + GLuint shader_id = glCreateShader(type); + glShaderSource(shader_id, 1, &source, nullptr); + NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + glCompileShader(shader_id); GLint result = GL_FALSE; - int info_log_length; - - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint info_log_length; + glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); if (info_log_length > 1) { - std::vector vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); + std::vector shader_error(info_log_length); + glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", &shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", &vertex_shader_error[0]); - } - } - - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, + &shader_error[0]); + NGLOG_ERROR(Render_OpenGL, "Shader source code:\n{}", source); } } + return shader_id; +} +GLuint LoadProgram(bool separable_program, const std::vector& shaders) { // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); + NGLOG_DEBUG(Render_OpenGL, "Linking program..."); GLuint program_id = glCreateProgram(); - glAttachShader(program_id, vertex_shader_id); - glAttachShader(program_id, fragment_shader_id); + + for (GLuint shader : shaders) { + if (shader != 0) { + glAttachShader(program_id, shader); + } + } + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } glLinkProgram(program_id); // Check the program + GLint result = GL_FALSE; + GLint info_log_length; glGetProgramiv(program_id, GL_LINK_STATUS, &result); glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); @@ -77,21 +78,19 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { std::vector program_error(info_log_length); glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", &program_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", &program_error[0]); } } - // If the program linking failed at least one of the shaders was probably bad - if (result == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); - } ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - glDeleteShader(vertex_shader_id); - glDeleteShader(fragment_shader_id); + for (GLuint shader : shaders) { + if (shader != 0) { + glDetachShader(program_id, shader); + } + } return program_id; } diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index a4bcffdfa..92d0fc534 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,16 +4,24 @@ #pragma once +#include #include namespace GLShader { /** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param vertex_shader String of the GLSL vertex shader program - * @param fragment_shader String of the GLSL fragment shader program - * @returns Handle of the newly created OpenGL shader object + * Utility function to create and compile an OpenGL GLSL shader + * @param source String of the GLSL shader program + * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) */ -GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader); +GLuint LoadShader(const char* source, GLenum type); + +/** + * Utility function to create and link an OpenGL GLSL shader program + * @param separable_program whether to create a separable program + * @param shaders ID of shaders to attach to the program + * @returns Handle of the newly created OpenGL program object + */ +GLuint LoadProgram(bool separable_program, const std::vector& shaders); } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index efb302da2..124a41cd9 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -71,6 +71,7 @@ OpenGLState::OpenGLState() { draw.vertex_buffer = 0; draw.uniform_buffer = 0; draw.shader_program = 0; + draw.program_pipeline = 0; scissor.enabled = false; scissor.x = 0; @@ -282,6 +283,11 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Program pipeline + if (draw.program_pipeline != cur_state.draw.program_pipeline) { + glBindProgramPipeline(draw.program_pipeline); + } + // Scissor test if (scissor.enabled != cur_state.scissor.enabled) { if (scissor.enabled) { @@ -360,6 +366,13 @@ OpenGLState& OpenGLState::ResetProgram(GLuint handle) { return *this; } +OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { + if (draw.program_pipeline == handle) { + draw.program_pipeline = 0; + } + return *this; +} + OpenGLState& OpenGLState::ResetBuffer(GLuint handle) { if (draw.vertex_buffer == handle) { draw.vertex_buffer = 0; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index a8a2f1e7d..29a0aabb5 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -128,6 +128,7 @@ public: GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM + GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING } draw; struct { @@ -161,6 +162,7 @@ public: OpenGLState& ResetTexture(GLuint handle); OpenGLState& ResetSampler(GLuint handle); OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetPipeline(GLuint handle); OpenGLState& ResetBuffer(GLuint handle); OpenGLState& ResetVertexArray(GLuint handle); OpenGLState& ResetFramebuffer(GLuint handle); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b4f69e8f..f827a26af 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -73,7 +73,7 @@ private: // OpenGL object IDs OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLShader shader; + OGLProgram shader; /// Display information for top and bottom screens respectively std::array screen_infos;