From 1159e4d928d1e1fa53bf83980bf700e97c2b2490 Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Wed, 30 Aug 2023 21:26:28 +0300 Subject: [PATCH] video_core: Take factors into account with min/max blending functions (#6925) * sw_framebuffer: Take factors into account for min/max blending * renderer_gl: Take factors into account for min/max blending * Address review comments * gl_shader_gen: Fix frambuffer fetch on qcom and mali * renderer_opengl: Add fallback path for mesa * gl_shader_gen: Avoid emitting blend emulation if minmax_factor is present --- externals/glad/include/glad/glad.h | 41 ++++- externals/glad/src/glad.c | 22 ++- src/video_core/renderer_opengl/gl_driver.cpp | 3 + src/video_core/renderer_opengl/gl_driver.h | 12 ++ .../renderer_opengl/gl_rasterizer.cpp | 50 ++++-- .../renderer_opengl/gl_rasterizer.h | 1 + .../renderer_opengl/gl_shader_gen.cpp | 167 +++++++++++++++--- .../renderer_opengl/gl_shader_gen.h | 12 +- .../renderer_opengl/gl_shader_manager.cpp | 9 +- src/video_core/renderer_opengl/gl_state.cpp | 6 + src/video_core/renderer_opengl/gl_state.h | 13 ++ src/video_core/renderer_opengl/pica_to_gl.h | 11 +- .../renderer_software/sw_framebuffer.cpp | 18 +- src/video_core/shader/shader_uniforms.cpp | 1 + src/video_core/shader/shader_uniforms.h | 3 +- 15 files changed, 306 insertions(+), 63 deletions(-) diff --git a/externals/glad/include/glad/glad.h b/externals/glad/include/glad/glad.h index 6d4dc8092..84561395b 100644 --- a/externals/glad/include/glad/glad.h +++ b/externals/glad/include/glad/glad.h @@ -1,28 +1,32 @@ /* - OpenGL, OpenGL ES loader generated by glad 0.1.36 on Sat Apr 1 20:34:42 2023. + OpenGL, OpenGL ES loader generated by glad 0.1.34 on Sat Aug 26 18:38:43 2023. Language/Generator: C/C++ Specification: gl APIs: gl=4.3, gles2=3.2 Profile: core Extensions: + GL_AMD_blend_minmax_factor, GL_ARB_buffer_storage, GL_ARB_clear_texture, GL_ARB_get_texture_sub_image, GL_ARB_texture_compression_bptc, + GL_ARM_shader_framebuffer_fetch, GL_EXT_buffer_storage, GL_EXT_clip_cull_distance, - GL_EXT_texture_compression_s3tc + GL_EXT_shader_framebuffer_fetch, + GL_EXT_texture_compression_s3tc, + GL_NV_blend_minmax_factor Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc" + --profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_AMD_blend_minmax_factor,GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_ARM_shader_framebuffer_fetch,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_shader_framebuffer_fetch,GL_EXT_texture_compression_s3tc,GL_NV_blend_minmax_factor" Online: - https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc + https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_AMD_blend_minmax_factor&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_ARM_shader_framebuffer_fetch&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_shader_framebuffer_fetch&extensions=GL_EXT_texture_compression_s3tc&extensions=GL_NV_blend_minmax_factor */ @@ -3320,6 +3324,8 @@ typedef void (APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv; #define glGetnUniformuiv glad_glGetnUniformuiv #endif +#define GL_FACTOR_MIN_AMD 0x901C +#define GL_FACTOR_MAX_AMD 0x901D #define GL_MAP_PERSISTENT_BIT 0x0040 #define GL_MAP_COHERENT_BIT 0x0080 #define GL_DYNAMIC_STORAGE_BIT 0x0100 @@ -3332,10 +3338,13 @@ GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv; #define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D #define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E #define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F +#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 #define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 #define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 #define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 #define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 +#define GL_FETCH_PER_SAMPLE_ARM 0x8F65 +#define GL_FRAGMENT_SHADER_FRAMEBUFFER_FETCH_MRT_ARM 0x8F66 #define GL_MAP_PERSISTENT_BIT_EXT 0x0040 #define GL_MAP_COHERENT_BIT_EXT 0x0080 #define GL_DYNAMIC_STORAGE_BIT_EXT 0x0100 @@ -3354,6 +3363,10 @@ GLAPI PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv; #define GL_CLIP_DISTANCE5_EXT 0x3005 #define GL_CLIP_DISTANCE6_EXT 0x3006 #define GL_CLIP_DISTANCE7_EXT 0x3007 +#ifndef GL_AMD_blend_minmax_factor +#define GL_AMD_blend_minmax_factor 1 +GLAPI int GLAD_GL_AMD_blend_minmax_factor; +#endif #ifndef GL_ARB_buffer_storage #define GL_ARB_buffer_storage 1 GLAPI int GLAD_GL_ARB_buffer_storage; @@ -3385,10 +3398,22 @@ GLAPI PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC glad_glGetCompressedTextureSubImage; #define GL_ARB_texture_compression_bptc 1 GLAPI int GLAD_GL_ARB_texture_compression_bptc; #endif +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_EXT_shader_framebuffer_fetch 1 +GLAPI int GLAD_GL_EXT_shader_framebuffer_fetch; +#endif #ifndef GL_EXT_texture_compression_s3tc #define GL_EXT_texture_compression_s3tc 1 GLAPI int GLAD_GL_EXT_texture_compression_s3tc; #endif +#ifndef GL_NV_blend_minmax_factor +#define GL_NV_blend_minmax_factor 1 +GLAPI int GLAD_GL_NV_blend_minmax_factor; +#endif +#ifndef GL_ARM_shader_framebuffer_fetch +#define GL_ARM_shader_framebuffer_fetch 1 +GLAPI int GLAD_GL_ARM_shader_framebuffer_fetch; +#endif #ifndef GL_EXT_buffer_storage #define GL_EXT_buffer_storage 1 GLAPI int GLAD_GL_EXT_buffer_storage; @@ -3400,10 +3425,18 @@ GLAPI PFNGLBUFFERSTORAGEEXTPROC glad_glBufferStorageEXT; #define GL_EXT_clip_cull_distance 1 GLAPI int GLAD_GL_EXT_clip_cull_distance; #endif +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_EXT_shader_framebuffer_fetch 1 +GLAPI int GLAD_GL_EXT_shader_framebuffer_fetch; +#endif #ifndef GL_EXT_texture_compression_s3tc #define GL_EXT_texture_compression_s3tc 1 GLAPI int GLAD_GL_EXT_texture_compression_s3tc; #endif +#ifndef GL_NV_blend_minmax_factor +#define GL_NV_blend_minmax_factor 1 +GLAPI int GLAD_GL_NV_blend_minmax_factor; +#endif #ifdef __cplusplus } diff --git a/externals/glad/src/glad.c b/externals/glad/src/glad.c index 175287a97..925bcaf54 100644 --- a/externals/glad/src/glad.c +++ b/externals/glad/src/glad.c @@ -1,28 +1,32 @@ /* - OpenGL, OpenGL ES loader generated by glad 0.1.36 on Sat Apr 1 20:34:42 2023. + OpenGL, OpenGL ES loader generated by glad 0.1.34 on Sat Aug 26 18:38:43 2023. Language/Generator: C/C++ Specification: gl APIs: gl=4.3, gles2=3.2 Profile: core Extensions: + GL_AMD_blend_minmax_factor, GL_ARB_buffer_storage, GL_ARB_clear_texture, GL_ARB_get_texture_sub_image, GL_ARB_texture_compression_bptc, + GL_ARM_shader_framebuffer_fetch, GL_EXT_buffer_storage, GL_EXT_clip_cull_distance, - GL_EXT_texture_compression_s3tc + GL_EXT_shader_framebuffer_fetch, + GL_EXT_texture_compression_s3tc, + GL_NV_blend_minmax_factor Loader: True Local files: False Omit khrplatform: False Reproducible: False Commandline: - --profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_texture_compression_s3tc" + --profile="core" --api="gl=4.3,gles2=3.2" --generator="c" --spec="gl" --extensions="GL_AMD_blend_minmax_factor,GL_ARB_buffer_storage,GL_ARB_clear_texture,GL_ARB_get_texture_sub_image,GL_ARB_texture_compression_bptc,GL_ARM_shader_framebuffer_fetch,GL_EXT_buffer_storage,GL_EXT_clip_cull_distance,GL_EXT_shader_framebuffer_fetch,GL_EXT_texture_compression_s3tc,GL_NV_blend_minmax_factor" Online: - https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_texture_compression_s3tc + https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D4.3&api=gles2%3D3.2&extensions=GL_AMD_blend_minmax_factor&extensions=GL_ARB_buffer_storage&extensions=GL_ARB_clear_texture&extensions=GL_ARB_get_texture_sub_image&extensions=GL_ARB_texture_compression_bptc&extensions=GL_ARM_shader_framebuffer_fetch&extensions=GL_EXT_buffer_storage&extensions=GL_EXT_clip_cull_distance&extensions=GL_EXT_shader_framebuffer_fetch&extensions=GL_EXT_texture_compression_s3tc&extensions=GL_NV_blend_minmax_factor */ #include @@ -853,13 +857,17 @@ PFNGLVIEWPORTARRAYVPROC glad_glViewportArrayv = NULL; PFNGLVIEWPORTINDEXEDFPROC glad_glViewportIndexedf = NULL; PFNGLVIEWPORTINDEXEDFVPROC glad_glViewportIndexedfv = NULL; PFNGLWAITSYNCPROC glad_glWaitSync = NULL; +int GLAD_GL_AMD_blend_minmax_factor = 0; int GLAD_GL_ARB_buffer_storage = 0; int GLAD_GL_ARB_clear_texture = 0; int GLAD_GL_ARB_get_texture_sub_image = 0; int GLAD_GL_ARB_texture_compression_bptc = 0; +int GLAD_GL_ARM_shader_framebuffer_fetch = 0; int GLAD_GL_EXT_buffer_storage = 0; int GLAD_GL_EXT_clip_cull_distance = 0; +int GLAD_GL_EXT_shader_framebuffer_fetch = 0; int GLAD_GL_EXT_texture_compression_s3tc = 0; +int GLAD_GL_NV_blend_minmax_factor = 0; PFNGLBUFFERSTORAGEPROC glad_glBufferStorage = NULL; PFNGLCLEARTEXIMAGEPROC glad_glClearTexImage = NULL; PFNGLCLEARTEXSUBIMAGEPROC glad_glClearTexSubImage = NULL; @@ -1498,11 +1506,14 @@ static void load_GL_ARB_get_texture_sub_image(GLADloadproc load) { } static int find_extensionsGL(void) { if (!get_exts()) return 0; + GLAD_GL_AMD_blend_minmax_factor = has_ext("GL_AMD_blend_minmax_factor"); GLAD_GL_ARB_buffer_storage = has_ext("GL_ARB_buffer_storage"); GLAD_GL_ARB_clear_texture = has_ext("GL_ARB_clear_texture"); GLAD_GL_ARB_get_texture_sub_image = has_ext("GL_ARB_get_texture_sub_image"); GLAD_GL_ARB_texture_compression_bptc = has_ext("GL_ARB_texture_compression_bptc"); + GLAD_GL_EXT_shader_framebuffer_fetch = has_ext("GL_EXT_shader_framebuffer_fetch"); GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); + GLAD_GL_NV_blend_minmax_factor = has_ext("GL_NV_blend_minmax_factor"); free_exts(); return 1; } @@ -1971,9 +1982,12 @@ static void load_GL_EXT_buffer_storage(GLADloadproc load) { } static int find_extensionsGLES2(void) { if (!get_exts()) return 0; + GLAD_GL_ARM_shader_framebuffer_fetch = has_ext("GL_ARM_shader_framebuffer_fetch"); GLAD_GL_EXT_buffer_storage = has_ext("GL_EXT_buffer_storage"); GLAD_GL_EXT_clip_cull_distance = has_ext("GL_EXT_clip_cull_distance"); + GLAD_GL_EXT_shader_framebuffer_fetch = has_ext("GL_EXT_shader_framebuffer_fetch"); GLAD_GL_EXT_texture_compression_s3tc = has_ext("GL_EXT_texture_compression_s3tc"); + GLAD_GL_NV_blend_minmax_factor = has_ext("GL_NV_blend_minmax_factor"); free_exts(); return 1; } diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index d9778393a..25cc1aedf 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -170,6 +170,9 @@ void Driver::CheckExtensionSupport() { arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc; ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance; ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc; + shader_framebuffer_fetch = + GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch; + blend_minmax_factor = GLAD_GL_AMD_blend_minmax_factor || GLAD_GL_NV_blend_minmax_factor; is_suitable = GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1; } diff --git a/src/video_core/renderer_opengl/gl_driver.h b/src/video_core/renderer_opengl/gl_driver.h index 1e1b5e246..fbb579f9a 100644 --- a/src/video_core/renderer_opengl/gl_driver.h +++ b/src/video_core/renderer_opengl/gl_driver.h @@ -105,6 +105,16 @@ public: return ext_clip_cull_distance; } + /// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch + bool HasShaderFramebufferFetch() const { + return shader_framebuffer_fetch; + } + + /// Returns true if the implementation supports (NV/AMD)_blend_minmax_factor + bool HasBlendMinMaxFactor() const { + return blend_minmax_factor; + } + private: void ReportDriverInfo(); void DeduceVendor(); @@ -125,6 +135,8 @@ private: bool ext_clip_cull_distance{}; bool ext_texture_compression_s3tc{}; bool arb_texture_compression_bptc{}; + bool shader_framebuffer_fetch{}; + bool blend_minmax_factor{}; std::string_view gl_version{}; std::string_view gpu_vendor{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 22e568aca..a06bc8606 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -468,14 +468,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT); } - use_custom_normal = false; - return succeeded; } void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + // Reset transient draw state + state.color_buffer.texture_2d = 0; + use_custom_normal = false; + const auto pica_textures = regs.texturing.GetTextures(); for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { const auto& texture = pica_textures[texture_index]; @@ -519,6 +521,10 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) { state.texture_units[texture_index].texture_2d = surface.Handle(); } } + + if (emulate_minmax_blend && !driver.HasShaderFramebufferFetch()) { + state.color_buffer.texture_2d = framebuffer->Attachment(SurfaceType::Color); + } } void RasterizerOpenGL::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { @@ -760,17 +766,14 @@ void RasterizerOpenGL::SyncCullMode() { case Pica::RasterizerRegs::CullMode::KeepAll: state.cull.enabled = false; break; - case Pica::RasterizerRegs::CullMode::KeepClockWise: state.cull.enabled = true; state.cull.front_face = GL_CW; break; - case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: state.cull.enabled = true; state.cull.front_face = GL_CCW; break; - default: LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}", static_cast(regs.rasterizer.cull_mode.Value())); @@ -784,10 +787,12 @@ void RasterizerOpenGL::SyncBlendEnabled() { } void RasterizerOpenGL::SyncBlendFuncs() { - state.blend.rgb_equation = - PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); - state.blend.a_equation = - PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + const bool has_minmax_factor = driver.HasBlendMinMaxFactor(); + + state.blend.rgb_equation = PicaToGL::BlendEquation( + regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb, has_minmax_factor); + state.blend.a_equation = PicaToGL::BlendEquation( + regs.framebuffer.output_merger.alpha_blending.blend_equation_a, has_minmax_factor); state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); state.blend.dst_rgb_func = @@ -796,14 +801,39 @@ void RasterizerOpenGL::SyncBlendFuncs() { PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); state.blend.dst_a_func = PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); + + if (has_minmax_factor) { + return; + } + + // Blending with min/max equations is emulated in the fragment shader so + // configure blending to not modify the incoming fragment color. + emulate_minmax_blend = false; + if (state.EmulateColorBlend()) { + emulate_minmax_blend = true; + state.blend.rgb_equation = GL_FUNC_ADD; + state.blend.src_rgb_func = GL_ONE; + state.blend.dst_rgb_func = GL_ZERO; + } + if (state.EmulateAlphaBlend()) { + emulate_minmax_blend = true; + state.blend.a_equation = GL_FUNC_ADD; + state.blend.src_a_func = GL_ONE; + state.blend.dst_a_func = GL_ZERO; + } } void RasterizerOpenGL::SyncBlendColor() { - auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); + const auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); state.blend.color.red = blend_color[0]; state.blend.color.green = blend_color[1]; state.blend.color.blue = blend_color[2]; state.blend.color.alpha = blend_color[3]; + + if (blend_color != uniform_block_data.data.blend_color) { + uniform_block_data.data.blend_color = blend_color; + uniform_block_data.dirty = true; + } } void RasterizerOpenGL::SyncLogicOp() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 22f1b5bd2..4c4bc61f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -155,6 +155,7 @@ private: OGLTexture texture_buffer_lut_rg; OGLTexture texture_buffer_lut_rgba; bool use_custom_normal{}; + bool emulate_minmax_blend{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0b85f5bee..180097b21 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -9,6 +9,7 @@ #include "core/core.h" #include "core/telemetry_session.h" #include "video_core/pica_state.h" +#include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -60,7 +61,8 @@ out gl_PerVertex { return out; } -PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool use_normal) { +PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor, + bool use_normal) { PicaFSConfig res{}; auto& state = res.state; @@ -229,6 +231,29 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool use_normal state.proctex.lut_filter = regs.texturing.proctex_lut.filter; } + const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value(); + const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value(); + if (regs.framebuffer.output_merger.alphablend_enable && !has_blend_minmax_factor) { + if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max || + rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) { + state.rgb_blend.emulate_blending = true; + state.rgb_blend.eq = rgb_eq; + state.rgb_blend.src_factor = + regs.framebuffer.output_merger.alpha_blending.factor_source_rgb; + state.rgb_blend.dst_factor = + regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb; + } + if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max || + alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) { + state.alpha_blend.emulate_blending = true; + state.alpha_blend.eq = alpha_eq; + state.alpha_blend.src_factor = + regs.framebuffer.output_merger.alpha_blending.factor_source_a; + state.alpha_blend.dst_factor = + regs.framebuffer.output_merger.alpha_blending.factor_dest_a; + } + } + state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == FramebufferRegs::FragmentOperationMode::Shadow; if (state.shadow_rendering) { @@ -1222,6 +1247,103 @@ float ProcTexNoiseCoef(vec2 x) { } } +static void WriteLogicOp(std::string& out, const PicaFSConfig& config) { + if (!GLES || config.state.alphablend_enable) { + return; + } + switch (config.state.logic_op) { + case FramebufferRegs::LogicOp::Clear: + out += "color = vec4(0);\n"; + break; + case FramebufferRegs::LogicOp::Set: + out += "color = vec4(1);\n"; + break; + case FramebufferRegs::LogicOp::Copy: + // Take the color output as-is + break; + case FramebufferRegs::LogicOp::CopyInverted: + out += "color = ~color;\n"; + break; + case FramebufferRegs::LogicOp::NoOp: + // We need to discard the color, but not necessarily the depth. This is not possible + // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast(config.state.logic_op)); + UNIMPLEMENTED(); + } +} + +static void WriteBlending(std::string& out, const PicaFSConfig& config) { + if (!config.state.rgb_blend.emulate_blending && !config.state.alpha_blend.emulate_blending) + [[likely]] { + return; + } + + using BlendFactor = Pica::FramebufferRegs::BlendFactor; + out += R"( +vec4 source_color = last_tex_env_out; +#if defined(GL_EXT_shader_framebuffer_fetch) +vec4 dest_color = color; +#elif defined(GL_ARM_shader_framebuffer_fetch) +vec4 dest_color = gl_LastFragColorARM; +#else +vec4 dest_color = texelFetch(colorBuffer, ivec2(gl_FragCoord.xy), 0); +#endif +)"; + const auto get_factor = [&](BlendFactor factor) -> std::string { + switch (factor) { + case BlendFactor::Zero: + return "vec4(0.f)"; + case BlendFactor::One: + return "vec4(1.f)"; + case BlendFactor::SourceColor: + return "source_color"; + case BlendFactor::OneMinusSourceColor: + return "vec4(1.f) - source_color"; + case BlendFactor::DestColor: + return "dest_color"; + case BlendFactor::OneMinusDestColor: + return "vec4(1.f) - dest_color"; + case BlendFactor::SourceAlpha: + return "source_color.aaaa"; + case BlendFactor::OneMinusSourceAlpha: + return "vec4(1.f) - source_color.aaaa"; + case BlendFactor::DestAlpha: + return "dest_color.aaaa"; + case BlendFactor::OneMinusDestAlpha: + return "vec4(1.f) - dest_color.aaaa"; + case BlendFactor::ConstantColor: + return "blend_color"; + case BlendFactor::OneMinusConstantColor: + return "vec4(1.f) - blend_color"; + case BlendFactor::ConstantAlpha: + return "blend_color.aaaa"; + case BlendFactor::OneMinusConstantAlpha: + return "vec4(1.f) - blend_color.aaaa"; + default: + LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor); + return "vec4(1.f)"; + } + }; + const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) { + return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max"; + }; + + if (config.state.rgb_blend.emulate_blending) { + out += fmt::format( + "last_tex_env_out.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n", + get_func(config.state.rgb_blend.eq), get_factor(config.state.rgb_blend.src_factor), + get_factor(config.state.rgb_blend.dst_factor)); + } + if (config.state.alpha_blend.emulate_blending) { + out += fmt::format( + "last_tex_env_out.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n", + get_func(config.state.alpha_blend.eq), get_factor(config.state.alpha_blend.src_factor), + get_factor(config.state.alpha_blend.dst_factor)); + } +} + ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { const auto& state = config.state; @@ -1235,6 +1357,17 @@ ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& confi out += fragment_shader_precision_OES; } + out += R"( +#if defined(GL_EXT_shader_framebuffer_fetch) +#extension GL_EXT_shader_framebuffer_fetch : enable +#elif defined(GL_ARM_shader_framebuffer_fetch) +#extension GL_ARM_shader_framebuffer_fetch : enable +#else +layout(location = 10) uniform sampler2D colorBuffer; +#endif + +)"; + out += GetVertexInterfaceDeclaration(false, separable_shader); out += R"( @@ -1242,7 +1375,7 @@ ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& confi in vec4 gl_FragCoord; #endif // CITRA_GLES -out vec4 color; +layout(location = 0) out vec4 color; uniform sampler2D tex0; uniform sampler2D tex1; @@ -1552,34 +1685,12 @@ do { } else { out += "gl_FragDepth = depth;\n"; // Round the final fragment color to maintain the PICA's 8 bits of precision - out += "color = byteround(last_tex_env_out);\n"; + out += "last_tex_env_out = byteround(last_tex_env_out);\n"; + WriteBlending(out, config); + out += "color = last_tex_env_out;\n"; } - if (GLES) { - if (!state.alphablend_enable) { - switch (state.logic_op) { - case FramebufferRegs::LogicOp::Clear: - out += "color = vec4(0);\n"; - break; - case FramebufferRegs::LogicOp::Set: - out += "color = vec4(1);\n"; - break; - case FramebufferRegs::LogicOp::Copy: - // Take the color output as-is - break; - case FramebufferRegs::LogicOp::CopyInverted: - out += "color = ~color;\n"; - break; - case FramebufferRegs::LogicOp::NoOp: - // We need to discard the color, but not necessarily the depth. This is not possible - // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast(state.logic_op)); - UNIMPLEMENTED(); - } - } - } + WriteLogicOp(out, config); out += '}'; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 557e58407..9249e4b51 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -11,6 +11,8 @@ namespace OpenGL { +class Driver; + namespace ShaderDecompiler { struct ProgramResult; } @@ -115,6 +117,13 @@ struct PicaFSConfigState { Pica::TexturingRegs::ProcTexFilter lut_filter; } proctex; + struct { + bool emulate_blending; + Pica::FramebufferRegs::BlendEquation eq; + Pica::FramebufferRegs::BlendFactor src_factor; + Pica::FramebufferRegs::BlendFactor dst_factor; + } rgb_blend, alpha_blend; + bool shadow_rendering; bool shadow_texture_orthographic; bool use_custom_normal_map; @@ -131,7 +140,8 @@ struct PicaFSConfigState { struct PicaFSConfig : Common::HashableStruct { /// Construct a PicaFSConfig with the given Pica register configuration. - static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool use_normal = false); + static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor, + bool use_normal = false); bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index f656705dd..10904a5ed 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -418,7 +418,8 @@ void ShaderProgramManager::UseTrivialGeometryShader() { } void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) { - PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs, use_normal); + PicaFSConfig config = + PicaFSConfig::BuildFromRegs(regs, driver.HasBlendMinMaxFactor(), use_normal); auto [handle, result] = impl->fragment_shaders.Get(config); impl->current.fs = handle; impl->current.fs_hash = config.Hash(); @@ -543,7 +544,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, std::move(shader)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(), + driver.HasBlendMinMaxFactor()); std::scoped_lock lock(mutex); impl->fragment_shaders.Inject(conf, std::move(shader)); } else { @@ -655,7 +657,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, std::scoped_lock lock(mutex); impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig()); + PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(), + driver.HasBlendMinMaxFactor()); result = GenerateFragmentShader(conf, impl->separable); OGLShaderStage stage{impl->separable}; stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index dc95e2c24..0ab4fc96c 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -247,6 +247,12 @@ void OpenGLState::Apply() const { glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer); } + // Color buffer + if (color_buffer.texture_2d != cur_state.color_buffer.texture_2d) { + glActiveTexture(TextureUnits::TextureColorBuffer.Enum()); + glBindTexture(GL_TEXTURE_2D, color_buffer.texture_2d); + } + // Shadow Images if (image_shadow_buffer != cur_state.image_shadow_buffer) { glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0, diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 2a5ef5edb..a9cccce34 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -27,6 +27,7 @@ constexpr TextureUnit TextureBufferLUT_LF{3}; constexpr TextureUnit TextureBufferLUT_RG{4}; constexpr TextureUnit TextureBufferLUT_RGBA{5}; constexpr TextureUnit TextureNormalMap{7}; +constexpr TextureUnit TextureColorBuffer{10}; } // namespace TextureUnits @@ -115,6 +116,10 @@ public: GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } texture_buffer_lut_rgba; + struct { + GLuint texture_2d; // GL_TEXTURE_BINDING_2D + } color_buffer; + // GL_IMAGE_BINDING_NAME GLuint image_shadow_buffer; union { @@ -165,6 +170,14 @@ public: return cur_state; } + bool EmulateColorBlend() const { + return blend.rgb_equation == GL_MIN || blend.rgb_equation == GL_MAX; + } + + bool EmulateAlphaBlend() const { + return blend.a_equation == GL_MIN || blend.a_equation == GL_MAX; + } + /// Apply this state as the current OpenGL state void Apply() const; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index ef7578c49..94ed61a6e 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -95,7 +95,7 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { return gl_mode; } -inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { +inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation, bool factor_minmax) { static constexpr std::array blend_equation_table{{ GL_FUNC_ADD, // BlendEquation::Add GL_FUNC_SUBTRACT, // BlendEquation::Subtract @@ -103,6 +103,13 @@ inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { GL_MIN, // BlendEquation::Min GL_MAX, // BlendEquation::Max }}; + static constexpr std::array blend_equation_table_minmax{{ + GL_FUNC_ADD, // BlendEquation::Add + GL_FUNC_SUBTRACT, // BlendEquation::Subtract + GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract + GL_FACTOR_MIN_AMD, // BlendEquation::Min + GL_FACTOR_MAX_AMD, // BlendEquation::Max + }}; const auto index = static_cast(equation); @@ -114,7 +121,7 @@ inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) { return GL_FUNC_ADD; } - return blend_equation_table[index]; + return (factor_minmax ? blend_equation_table_minmax : blend_equation_table)[index]; } inline GLenum BlendFunc(Pica::FramebufferRegs::BlendFactor factor) { diff --git a/src/video_core/renderer_software/sw_framebuffer.cpp b/src/video_core/renderer_software/sw_framebuffer.cpp index 0b0695ca5..2fbcc6844 100644 --- a/src/video_core/renderer_software/sw_framebuffer.cpp +++ b/src/video_core/renderer_software/sw_framebuffer.cpp @@ -311,19 +311,17 @@ Common::Vec4 EvaluateBlendEquation(const Common::Vec4& src, case FramebufferRegs::BlendEquation::ReverseSubtract: result = (dst_result - src_result) / 255; break; - // TODO: How do these two actually work? OpenGL doesn't include the blend factors in the - // min/max computations, but is this what the 3DS actually does? case FramebufferRegs::BlendEquation::Min: - result.r() = std::min(src.r(), dest.r()); - result.g() = std::min(src.g(), dest.g()); - result.b() = std::min(src.b(), dest.b()); - result.a() = std::min(src.a(), dest.a()); + result.r() = std::min(src_result.r(), dst_result.r()) / 255; + result.g() = std::min(src_result.g(), dst_result.g()) / 255; + result.b() = std::min(src_result.b(), dst_result.b()) / 255; + result.a() = std::min(src_result.a(), dst_result.a()) / 255; break; case FramebufferRegs::BlendEquation::Max: - result.r() = std::max(src.r(), dest.r()); - result.g() = std::max(src.g(), dest.g()); - result.b() = std::max(src.b(), dest.b()); - result.a() = std::max(src.a(), dest.a()); + result.r() = std::max(src_result.r(), dst_result.r()) / 255; + result.g() = std::max(src_result.g(), dst_result.g()) / 255; + result.b() = std::max(src_result.b(), dst_result.b()) / 255; + result.a() = std::max(src_result.a(), dst_result.a()) / 255; break; default: LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation); diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp index baa496474..48354e5d5 100644 --- a/src/video_core/shader/shader_uniforms.cpp +++ b/src/video_core/shader/shader_uniforms.cpp @@ -69,6 +69,7 @@ layout ({}std140) uniform shader_data {{ vec3 tex_lod_bias; vec4 tex_border_color[3]; vec4 clip_coef; + vec4 blend_color; }}; )"; diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/shader_uniforms.h index 168f3f14c..75c69a39f 100644 --- a/src/video_core/shader/shader_uniforms.h +++ b/src/video_core/shader/shader_uniforms.h @@ -66,9 +66,10 @@ struct UniformData { alignas(16) Common::Vec3f tex_lod_bias; alignas(16) Common::Vec4f tex_border_color[3]; alignas(16) Common::Vec4f clip_coef; + alignas(16) Common::Vec4f blend_color; }; -static_assert(sizeof(UniformData) == 0x530, +static_assert(sizeof(UniformData) == 0x540, "The size of the UniformData does not match the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");