// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include #include #include "common/bit_set.h" #include "common/logging/log.h" #include "core/core.h" #include "video_core/pica_state.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/video_core.h" using Pica::FramebufferRegs; using Pica::LightingRegs; using Pica::RasterizerRegs; using Pica::TexturingRegs; using TevStageConfig = TexturingRegs::TevStageConfig; using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; namespace OpenGL { constexpr std::string_view UniformBlockDef = R"( #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 #define NUM_LIGHTING_SAMPLERS 24 struct LightSrc { vec3 specular_0; vec3 specular_1; vec3 diffuse; vec3 ambient; vec3 position; vec3 spot_direction; float dist_atten_bias; float dist_atten_scale; }; layout (std140) uniform shader_data { int framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; float shadow_bias_constant; float shadow_bias_linear; int scissor_x1; int scissor_y1; int scissor_x2; int scissor_y2; int fog_lut_offset; int proctex_noise_lut_offset; int proctex_color_map_offset; int proctex_alpha_map_offset; int proctex_lut_offset; int proctex_diff_lut_offset; float proctex_bias; int shadow_texture_bias; ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; vec3 fog_color; vec2 proctex_noise_f; vec2 proctex_noise_a; vec2 proctex_noise_p; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; vec4 clip_coef; }; )"; static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) { std::string out; const auto append_variable = [&](std::string_view var, int location) { if (separable_shader) { out += fmt::format("layout (location={}) ", location); } out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); }; append_variable("vec4 primary_color", ATTRIBUTE_COLOR); append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0); append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1); append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2); append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W); append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); append_variable("vec3 view", ATTRIBUTE_VIEW); if (is_output && separable_shader) { // gl_PerVertex redeclaration is required for separate shader object out += R"( out gl_PerVertex { vec4 gl_Position; #if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) float gl_ClipDistance[2]; #endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) }; )"; } return out; } PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) { PicaFSConfig res{}; auto& state = res.state; state.scissor_test_mode = regs.rasterizer.scissor_test.mode; state.depthmap_enable = regs.rasterizer.depthmap_enable; state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable ? regs.framebuffer.output_merger.alpha_test.func.Value() : FramebufferRegs::CompareFunc::Always; state.texture0_type = regs.texturing.texture0.type; state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; if (GLES) { // With GLES, we need this in the fragment shader to emulate logic operations state.alphablend_enable = Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1; state.logic_op = regs.framebuffer.output_merger.logic_op; } else { // We don't need these otherwise, reset them to avoid unnecessary shader generation state.alphablend_enable = {}; state.logic_op = {}; } // Copy relevant tev stages fields. // We don't sync const_color here because of the high variance, it is a // shader uniform instead. const auto& tev_stages = regs.texturing.GetTevStages(); DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); for (std::size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = tev_stages[i]; state.tev_stages[i].sources_raw = tev_stage.sources_raw; state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; state.tev_stages[i].ops_raw = tev_stage.ops_raw; state.tev_stages[i].scales_raw = tev_stage.scales_raw; } state.fog_mode = regs.texturing.fog_mode; state.fog_flip = regs.texturing.fog_flip != 0; state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4; // Fragment lighting state.lighting.enable = !regs.lighting.disable; state.lighting.src_num = regs.lighting.max_light_index + 1; for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; state.lighting.light[light_index].num = num; state.lighting.light[light_index].directional = light.config.directional != 0; state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); state.lighting.light[light_index].spot_atten_enable = !regs.lighting.IsSpotAttenDisabled(num); state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num); } state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); // this is a dummy field due to lack of the corresponding register state.lighting.lut_sp.enable = true; state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); state.lighting.config = regs.lighting.config0.config; state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; state.lighting.bump_mode = regs.lighting.config0.bump_mode; state.lighting.bump_selector = regs.lighting.config0.bump_selector; state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; state.lighting.shadow_selector = regs.lighting.config0.shadow_selector; state.proctex.enable = regs.texturing.main_config.texture3_enable; if (state.proctex.enable) { state.proctex.coord = regs.texturing.main_config.texture3_coordinates; state.proctex.u_clamp = regs.texturing.proctex.u_clamp; state.proctex.v_clamp = regs.texturing.proctex.v_clamp; state.proctex.color_combiner = regs.texturing.proctex.color_combiner; state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; state.proctex.noise_enable = regs.texturing.proctex.noise_enable; state.proctex.u_shift = regs.texturing.proctex.u_shift; state.proctex.v_shift = regs.texturing.proctex.v_shift; state.proctex.lut_width = regs.texturing.proctex_lut.width; state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; state.proctex.lut_filter = regs.texturing.proctex_lut.filter; } state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == FramebufferRegs::FragmentOperationMode::Shadow; state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; return res; } void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { program_hash = setup.GetProgramCodeHash(); swizzle_hash = setup.GetSwizzleDataHash(); main_offset = regs.main_offset; sanitize_mul = VideoCore::g_hw_shader_accurate_mul; num_outputs = 0; output_map.fill(16); for (int reg : Common::BitSet(regs.output_mask)) { output_map[reg] = num_outputs++; } } void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); gs_output_attributes = vs_output_attributes; semantic_maps.fill({16, 0}); for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { const std::array semantics{ regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), }; for (u32 comp = 0; comp < 4; ++comp) { const auto semantic = semantics[comp]; if (static_cast(semantic) < 24) { semantic_maps[static_cast(semantic)] = {attrib, comp}; } else if (semantic != VSOutputAttributes::INVALID) { LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); } } } } /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) static bool IsPassThroughTevStage(const TevStageConfig& stage) { return (stage.color_op == TevStageConfig::Operation::Replace && stage.alpha_op == TevStageConfig::Operation::Replace && stage.color_source1 == TevStageConfig::Source::Previous && stage.alpha_source1 == TevStageConfig::Source::Previous && stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); } static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { const auto& state = config.state; switch (texture_unit) { case 0: // Only unit 0 respects the texturing type switch (state.texture0_type) { case TexturingRegs::TextureConfig::Texture2D: return "textureLod(tex0, texcoord0, getLod(texcoord0 * vec2(textureSize(tex0, 0))))"; case TexturingRegs::TextureConfig::Projection2D: // TODO (wwylele): find the exact LOD formula for projection texture return "textureProj(tex0, vec3(texcoord0, texcoord0_w))"; case TexturingRegs::TextureConfig::TextureCube: return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; case TexturingRegs::TextureConfig::Shadow2D: return "shadowTexture(texcoord0, texcoord0_w)"; case TexturingRegs::TextureConfig::ShadowCube: return "shadowTextureCube(texcoord0, texcoord0_w)"; case TexturingRegs::TextureConfig::Disabled: return "vec4(0.0)"; default: LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); UNIMPLEMENTED(); return "texture(tex0, texcoord0)"; } case 1: return "textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, 0))))"; case 2: if (state.texture2_use_coord1) return "textureLod(tex2, texcoord1, getLod(texcoord1 * vec2(textureSize(tex2, 0))))"; else return "textureLod(tex2, texcoord2, getLod(texcoord2 * vec2(textureSize(tex2, 0))))"; case 3: if (state.proctex.enable) { return "ProcTex()"; } else { LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); return "vec4(0.0)"; } default: UNREACHABLE(); return ""; } } /// Writes the specified TEV stage source component(s) static void AppendSource(std::string& out, const PicaFSConfig& config, TevStageConfig::Source source, std::string_view index_name) { using Source = TevStageConfig::Source; switch (source) { case Source::PrimaryColor: out += "rounded_primary_color"; break; case Source::PrimaryFragmentColor: out += "primary_fragment_color"; break; case Source::SecondaryFragmentColor: out += "secondary_fragment_color"; break; case Source::Texture0: out += SampleTexture(config, 0); break; case Source::Texture1: out += SampleTexture(config, 1); break; case Source::Texture2: out += SampleTexture(config, 2); break; case Source::Texture3: out += SampleTexture(config, 3); break; case Source::PreviousBuffer: out += "combiner_buffer"; break; case Source::Constant: out += "const_color["; out += index_name; out += ']'; break; case Source::Previous: out += "last_tex_env_out"; break; default: out += "vec4(0.0)"; LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source); break; } } /// Writes the color components to use for the specified TEV stage color modifier static void AppendColorModifier(std::string& out, const PicaFSConfig& config, TevStageConfig::ColorModifier modifier, TevStageConfig::Source source, std::string_view index_name) { using ColorModifier = TevStageConfig::ColorModifier; switch (modifier) { case ColorModifier::SourceColor: AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::OneMinusSourceColor: out += "vec3(1.0) - "; AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::SourceAlpha: AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::OneMinusSourceAlpha: out += "vec3(1.0) - "; AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::SourceRed: AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::OneMinusSourceRed: out += "vec3(1.0) - "; AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::SourceGreen: AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::OneMinusSourceGreen: out += "vec3(1.0) - "; AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::SourceBlue: AppendSource(out, config, source, index_name); out += ".bbb"; break; case ColorModifier::OneMinusSourceBlue: out += "vec3(1.0) - "; AppendSource(out, config, source, index_name); out += ".bbb"; break; default: out += "vec3(0.0)"; LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier); break; } } /// Writes the alpha component to use for the specified TEV stage alpha modifier static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, TevStageConfig::AlphaModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using AlphaModifier = TevStageConfig::AlphaModifier; switch (modifier) { case AlphaModifier::SourceAlpha: AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::OneMinusSourceAlpha: out += "1.0 - "; AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::SourceRed: AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::OneMinusSourceRed: out += "1.0 - "; AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::SourceGreen: AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::OneMinusSourceGreen: out += "1.0 - "; AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::SourceBlue: AppendSource(out, config, source, index_name); out += ".b"; break; case AlphaModifier::OneMinusSourceBlue: out += "1.0 - "; AppendSource(out, config, source, index_name); out += ".b"; break; default: out += "0.0"; LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier); break; } } /// Writes the combiner function for the color components for the specified TEV stage operation static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, std::string_view variable_name) { out += "clamp("; using Operation = TevStageConfig::Operation; switch (operation) { case Operation::Replace: out += fmt::format("{}[0]", variable_name); break; case Operation::Modulate: out += fmt::format("{0}[0] * {0}[1]", variable_name); break; case Operation::Add: out += fmt::format("{0}[0] + {0}[1]", variable_name); break; case Operation::AddSigned: out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name); break; case Operation::Lerp: out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name); break; case Operation::Subtract: out += fmt::format("{0}[0] - {0}[1]", variable_name); break; case Operation::MultiplyThenAdd: out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); break; case Operation::AddThenMultiply: out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name); break; case Operation::Dot3_RGB: case Operation::Dot3_RGBA: out += fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name); break; default: out += "vec3(0.0)"; LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation); break; } out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0 } /// Writes the combiner function for the alpha component for the specified TEV stage operation static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, std::string_view variable_name) { out += "clamp("; using Operation = TevStageConfig::Operation; switch (operation) { case Operation::Replace: out += fmt::format("{}[0]", variable_name); break; case Operation::Modulate: out += fmt::format("{0}[0] * {0}[1]", variable_name); break; case Operation::Add: out += fmt::format("{0}[0] + {0}[1]", variable_name); break; case Operation::AddSigned: out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name); break; case Operation::Lerp: out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name); break; case Operation::Subtract: out += fmt::format("{0}[0] - {0}[1]", variable_name); break; case Operation::MultiplyThenAdd: out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); break; case Operation::AddThenMultiply: out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name); break; default: out += "0.0"; LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation); break; } out += ", 0.0, 1.0)"; } /// Writes the if-statement condition used to evaluate alpha testing static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { using CompareFunc = FramebufferRegs::CompareFunc; switch (func) { case CompareFunc::Never: out += "true"; break; case CompareFunc::Always: out += "false"; break; case CompareFunc::Equal: case CompareFunc::NotEqual: case CompareFunc::LessThan: case CompareFunc::LessThanOrEqual: case CompareFunc::GreaterThan: case CompareFunc::GreaterThanOrEqual: { static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; const auto index = static_cast(func) - static_cast(CompareFunc::Equal); out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); break; } default: out += "false"; LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func); break; } } /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { const auto stage = static_cast(config.state.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { const std::string index_name = std::to_string(index); out += fmt::format("vec3 color_results_{}_1 = ", index_name); AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name); AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name); AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, " "color_results_{}_2, color_results_{}_3);\n", index_name, index_name, index_name, index_name); // Round the output of each TEV stage to maintain the PICA's 8 bits of precision out += fmt::format("vec3 color_output_{} = byteround(", index_name); AppendColorCombiner(out, stage.color_op, "color_results_" + index_name); out += ");\n"; if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { // result of Dot3_RGBA operation is also placed to the alpha component out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name); } else { out += fmt::format("float alpha_results_{}[3] = float[3](", index_name); AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); out += ", "; AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); out += ", "; AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); out += ");\n"; out += fmt::format("float alpha_output_{} = byteround(", index_name); AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); out += ");\n"; } out += fmt::format("last_tex_env_out = vec4(" "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", index_name, stage.GetColorMultiplier(), index_name, stage.GetAlphaMultiplier()); } out += "combiner_buffer = next_combiner_buffer;\n"; if (config.TevStageUpdatesCombinerBufferColor(index)) out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n"; if (config.TevStageUpdatesCombinerBufferAlpha(index)) out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; } /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaFSConfig& config) { const auto& lighting = config.state.lighting; // Define lighting globals out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec3 light_vector = vec3(0.0);\n" "vec3 refl_value = vec3(0.0);\n" "vec3 spot_dir = vec3(0.0);\n" "vec3 half_vector = vec3(0.0);\n" "float dot_product = 0.0;\n" "float clamp_highlights = 1.0;\n" "float geo_factor = 1.0;\n"; // Compute fragment normals and tangents const auto Perturbation = [&] { return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); }; if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map out += fmt::format("vec3 surface_normal = {};\n", Perturbation()); // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result if (lighting.bump_renorm) { constexpr std::string_view val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); } // The tangent vector is not perturbed by the normal map and is just a unit vector. out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map out += fmt::format("vec3 surface_tangent = {};\n", Perturbation()); // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant // computation below, which is also confirmed on 3DS. So we don't bother recomputing here // even if 'renorm' is enabled. // The normal vector is not perturbed by the tangent map and is just a unit vector. out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; } else { // No bump mapping - surface local normal and tangent are just unit vectors out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } // Rotate the surface-local normal by the interpolated normal quaternion to convert it to // eyespace. out += "vec4 normalized_normquat = normalize(normquat);\n" "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; if (lighting.enable_shadow) { std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); if (lighting.shadow_invert) { out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); } else { out += fmt::format("vec4 shadow = {};\n", shadow_texture); } } else { out += "vec4 shadow = vec4(1.0);\n"; } // Samples the specified lookup table for specular lighting auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, LightingRegs::LightingLutInput input, bool abs) { std::string index; switch (input) { case LightingRegs::LightingLutInput::NH: index = "dot(normal, normalize(half_vector))"; break; case LightingRegs::LightingLutInput::VH: index = "dot(normalize(view), normalize(half_vector))"; break; case LightingRegs::LightingLutInput::NV: index = "dot(normal, normalize(view))"; break; case LightingRegs::LightingLutInput::LN: index = "dot(light_vector, normal)"; break; case LightingRegs::LightingLutInput::SP: index = "dot(light_vector, spot_dir)"; break; case LightingRegs::LightingLutInput::CP: // CP input is only available with configuration 7 if (lighting.config == LightingRegs::LightingConfig::Config7) { // Note: even if the normal vector is modified by normal map, which is not the // normal of the tangent plane anymore, the half angle vector is still projected // using the modified normal vector. constexpr std::string_view half_angle_proj = "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; // Note: the half angle vector projection is confirmed not normalized before the dot // product. The result is in fact not cos(phi) as the name suggested. index = fmt::format("dot({}, tangent)", half_angle_proj); } else { index = "0.0"; } break; default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input); UNIMPLEMENTED(); index = "0.0"; break; } const auto sampler_index = static_cast(sampler); if (abs) { // LUT index is in the range of (0.0, 1.0) index = lighting.light[light_num].two_sided_diffuse ? fmt::format("abs({})", index) : fmt::format("max({}, 0.0)", index); return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); } else { // LUT index is in the range of (-1.0, 1.0) return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); } }; // Write the code to emulate each enabled light for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { const auto& light_config = lighting.light[light_index]; const std::string light_src = fmt::format("light_src[{}]", light_config.num); // Compute light vector (directional or positional) if (light_config.directional) { out += fmt::format("light_vector = normalize({}.position);\n", light_src); } else { out += fmt::format("light_vector = normalize({}.position + view);\n", light_src); } out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); out += "half_vector = normalize(view) + light_vector;\n"; // Compute dot product of light_vector and normal, adjust if lighting is one-sided or // two-sided out += std::string("dot_product = ") + (light_config.two_sided_diffuse ? "abs(dot(light_vector, normal));\n" : "max(dot(light_vector, normal), 0.0);\n"); // If enabled, clamp specular component if lighting result is zero if (lighting.clamp_highlights) { out += "clamp_highlights = sign(dot_product);\n"; } // If enabled, compute spot light attenuation value std::string spot_atten = "1.0"; if (light_config.spot_atten_enable && LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { const std::string value = GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); } // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; if (light_config.dist_atten_enable) { const std::string index = fmt::format("clamp({}.dist_atten_scale * length(-view - " "{}.position) + {}.dist_atten_bias, 0.0, 1.0)", light_src, light_src, light_src); const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); } if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { out += "geo_factor = dot(half_vector, half_vector);\n" "geo_factor = geo_factor == 0.0 ? 0.0 : min(" "dot_product / geo_factor, 1.0);\n"; } // Specular 0 component std::string d0_lut_value = "1.0"; if (lighting.lut_d0.enable && LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value const std::string value = GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); } std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); if (light_config.geometric_factor_0) { specular_0 = fmt::format("({} * geo_factor)", specular_0); } // If enabled, lookup ReflectRed value, otherwise, 1.0 is used if (lighting.lut_rr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectRed)) { std::string value = GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); out += fmt::format("refl_value.r = {};\n", value); } else { out += "refl_value.r = 1.0;\n"; } // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used if (lighting.lut_rg.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectGreen)) { std::string value = GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); out += fmt::format("refl_value.g = {};\n", value); } else { out += "refl_value.g = refl_value.r;\n"; } // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used if (lighting.lut_rb.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectBlue)) { std::string value = GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); out += fmt::format("refl_value.b = {};\n", value); } else { out += "refl_value.b = refl_value.r;\n"; } // Specular 1 component std::string d1_lut_value = "1.0"; if (lighting.lut_d1.enable && LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value const std::string value = GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); } std::string specular_1 = fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); if (light_config.geometric_factor_1) { specular_1 = fmt::format("({} * geo_factor)", specular_1); } // Fresnel // Note: only the last entry in the light slots applies the Fresnel factor if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value std::string value = GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); // Enabled for diffuse lighting alpha component if (lighting.enable_primary_alpha) { out += fmt::format("diffuse_sum.a = {};\n", value); } // Enabled for the specular lighting alpha component if (lighting.enable_secondary_alpha) { out += fmt::format("specular_sum.a = {};\n", value); } } bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable; std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; // Compute primary fragment color (diffuse lighting) function out += fmt::format( "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", light_src, light_src, dist_atten, spot_atten, shadow_primary); // Compute secondary fragment color (specular lighting) function out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); } // Apply shadow attenuation to alpha components if enabled if (lighting.shadow_alpha) { if (lighting.enable_primary_alpha) { out += "diffuse_sum.a *= shadow.a;\n"; } if (lighting.enable_secondary_alpha) { out += "specular_sum.a *= shadow.a;\n"; } } // Sum final lighting result out += "diffuse_sum.rgb += lighting_global_ambient;\n" "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; } using ProcTexClamp = TexturingRegs::ProcTexClamp; using ProcTexShift = TexturingRegs::ProcTexShift; using ProcTexCombiner = TexturingRegs::ProcTexCombiner; using ProcTexFilter = TexturingRegs::ProcTexFilter; static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, ProcTexClamp clamp_mode) { const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; switch (mode) { case ProcTexShift::None: out += "0.0"; break; case ProcTexShift::Odd: out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); break; case ProcTexShift::Even: out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); break; default: LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); out += "0.0"; break; } } static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { switch (mode) { case ProcTexClamp::ToZero: out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); break; case ProcTexClamp::ToEdge: out += fmt::format("{0} = min({0}, 1.0);\n", var); break; case ProcTexClamp::SymmetricalRepeat: out += fmt::format("{0} = fract({0});\n", var); break; case ProcTexClamp::MirroredRepeat: { out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); break; } case ProcTexClamp::Pulse: out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); break; default: LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); out += fmt::format("{0} = min({0}, 1.0);\n", var); break; } } static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, std::string_view offset) { const auto combined = [combiner]() -> std::string_view { switch (combiner) { case ProcTexCombiner::U: return "u"; case ProcTexCombiner::U2: return "(u * u)"; case TexturingRegs::ProcTexCombiner::V: return "v"; case TexturingRegs::ProcTexCombiner::V2: return "(v * v)"; case TexturingRegs::ProcTexCombiner::Add: return "((u + v) * 0.5)"; case TexturingRegs::ProcTexCombiner::Add2: return "((u * u + v * v) * 0.5)"; case TexturingRegs::ProcTexCombiner::SqrtAdd2: return "min(sqrt(u * u + v * v), 1.0)"; case TexturingRegs::ProcTexCombiner::Min: return "min(u, v)"; case TexturingRegs::ProcTexCombiner::Max: return "max(u, v)"; case TexturingRegs::ProcTexCombiner::RMax: return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; default: LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); return "0.0"; } }(); out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); } static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { // LUT sampling uitlity // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using // value entries and difference entries. out += R"( float ProcTexLookupLUT(int offset, float coord) { coord *= 128.0; float index_i = clamp(floor(coord), 0.0, 127.0); float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be // extracted as index_i = 127.0 and index_f = 1.0 vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; return clamp(entry.r + entry.g * index_f, 0.0, 1.0); } )"; // Noise utility if (config.state.proctex.noise_enable) { // See swrasterizer/proctex.cpp for more information about these functions out += R"( int ProcTexNoiseRand1D(int v) { const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; } float ProcTexNoiseRand2D(vec2 point) { const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); int u2 = ProcTexNoiseRand1D(int(point.x)); int v2 = ProcTexNoiseRand1D(int(point.y)); v2 += ((u2 & 3) == 1) ? 4 : 0; v2 ^= (u2 & 1) * 6; v2 += 10 + u2; v2 &= 0xF; v2 ^= table[u2]; return -1.0 + float(v2) * 2.0/ 15.0; } float ProcTexNoiseCoef(vec2 x) { vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); vec2 point = floor(grid); vec2 frac = grid - point; float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); float x0 = mix(g0, g1, x_noise); float x1 = mix(g2, g3, x_noise); return mix(x0, x1, y_noise); } )"; } out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); // Offsets for level 4-7 seem to be hardcoded out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); out += "int lut_offset = lut_offsets[level];\n"; // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] out += "lut_coord *= float(lut_width - 1);\n"; switch (config.state.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::LinearMipmapLinear: case ProcTexFilter::LinearMipmapNearest: out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; out += "float lut_index_f = fract(lut_coord);\n"; out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " "proctex_lut_offset) + " "lut_index_f * " "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; break; case ProcTexFilter::Nearest: case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::NearestMipmapNearest: out += "lut_coord += float(lut_offset);\n"; out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " "proctex_lut_offset);\n"; break; } out += "}\n"; out += "vec4 ProcTex() {\n"; if (config.state.proctex.coord < 3) { out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord); } else { LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); out += "vec2 uv = abs(texcoord0);\n"; } // This LOD formula is the same as the LOD upper limit defined in OpenGL. // f(x, y) <= m_u + m_v + m_w // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) // Note: this is different from the one normal 2D textures use. out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; // unlike normal texture, the bias is inside the log2 out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", config.state.proctex.lut_width); out += "if (proctex_bias == 0.0) lod = 0.0;\n"; out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", std::max(0.0f, static_cast(config.state.proctex.lod_min)), std::min(7.0f, static_cast(config.state.proctex.lod_max))); // Get shift offset before noise generation out += "float u_shift = "; AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, config.state.proctex.u_clamp); out += ";\n"; out += "float v_shift = "; AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, config.state.proctex.v_clamp); out += ";\n"; // Generate noise if (config.state.proctex.noise_enable) { out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" "uv = abs(uv);\n"; } // Shift out += "float u = uv.x + u_shift;\n" "float v = uv.y + v_shift;\n"; // Clamp AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); // Combine and map out += "float lut_coord = "; AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map_offset"); out += ";\n"; switch (config.state.proctex.lut_filter) { case ProcTexFilter::Linear: case ProcTexFilter::Nearest: out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; break; case ProcTexFilter::NearestMipmapNearest: case ProcTexFilter::LinearMipmapNearest: out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; break; case ProcTexFilter::NearestMipmapLinear: case ProcTexFilter::LinearMipmapLinear: out += "int lod_i = int(lod);\n" "float lod_f = fract(lod);\n" "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; break; } if (config.state.proctex.separate_alpha) { // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It // uses the output of CombineAndMap directly instead. out += "float final_alpha = "; AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map_offset"); out += ";\n"; out += "return vec4(final_color.xyz, final_alpha);\n}\n"; } else { out += "return final_color;\n}\n"; } } ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { const auto& state = config.state; std::string out; if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } if (GLES) { out += fragment_shader_precision_OES; } out += GetVertexInterfaceDeclaration(false, separable_shader); out += R"( #ifndef CITRA_GLES in vec4 gl_FragCoord; #endif // CITRA_GLES out vec4 color; uniform sampler2D tex0; uniform sampler2D tex1; uniform sampler2D tex2; uniform samplerCube tex_cube; uniform samplerBuffer texture_buffer_lut_lf; uniform samplerBuffer texture_buffer_lut_rg; uniform samplerBuffer texture_buffer_lut_rgba; layout(r32ui) uniform readonly uimage2D shadow_texture_px; layout(r32ui) uniform readonly uimage2D shadow_texture_nx; layout(r32ui) uniform readonly uimage2D shadow_texture_py; layout(r32ui) uniform readonly uimage2D shadow_texture_ny; layout(r32ui) uniform readonly uimage2D shadow_texture_pz; layout(r32ui) uniform readonly uimage2D shadow_texture_nz; layout(r32ui) uniform uimage2D shadow_buffer; )"; out += UniformBlockDef; out += R"( // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); } float LookupLightingLUT(int lut_index, int index, float delta) { vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; return entry.r + entry.g * delta; } float LookupLightingLUTUnsigned(int lut_index, float pos) { int index = clamp(int(pos * 256.0), 0, 255); float delta = pos * 256.0 - float(index); return LookupLightingLUT(lut_index, index, delta); } float LookupLightingLUTSigned(int lut_index, float pos) { int index = clamp(int(pos * 128.0), -128, 127); float delta = pos * 128.0 - float(index); if (index < 0) index += 256; return LookupLightingLUT(lut_index, index, delta); } float byteround(float x) { return round(x * 255.0) * (1.0 / 255.0); } vec2 byteround(vec2 x) { return round(x * 255.0) * (1.0 / 255.0); } vec3 byteround(vec3 x) { return round(x * 255.0) * (1.0 / 255.0); } vec4 byteround(vec4 x) { return round(x * 255.0) * (1.0 / 255.0); } // PICA's LOD formula for 2D textures. // This LOD formula is the same as the LOD lower limit defined in OpenGL. // f(x, y) >= max{m_u, m_v, m_w} // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) float getLod(vec2 coord) { vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); return log2(max(d.x, d.y)); } uvec2 DecodeShadow(uint pixel) { return uvec2(pixel >> 8, pixel & 0xFFu); } uint EncodeShadow(uvec2 pixel) { return (pixel.x << 8) | pixel.y; } float CompareShadow(uint pixel, uint z) { uvec2 p = DecodeShadow(pixel); return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); } float SampleShadow2D(ivec2 uv, uint z) { if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) return 1.0; return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); } float mix2(vec4 s, vec2 a) { vec2 t = mix(s.xy, s.zw, a.yy); return mix(t.x, t.y, a.x); } vec4 shadowTexture(vec2 uv, float w) { )"; if (!config.state.shadow_texture_orthographic) { out += "uv /= w;"; } out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; out += R"( vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); vec2 coord_floor = floor(coord); vec2 f = coord - coord_floor; ivec2 i = ivec2(coord_floor); vec4 s = vec4( SampleShadow2D(i , z), SampleShadow2D(i + ivec2(1, 0), z), SampleShadow2D(i + ivec2(0, 1), z), SampleShadow2D(i + ivec2(1, 1), z)); return vec4(mix2(s, f)); } vec4 shadowTextureCube(vec2 uv, float w) { ivec2 size = imageSize(shadow_texture_px); vec3 c = vec3(uv, w); vec3 a = abs(c); if (a.x > a.y && a.x > a.z) { w = a.x; uv = -c.zy; if (c.x < 0.0) uv.x = -uv.x; } else if (a.y > a.z) { w = a.y; uv = c.xz; if (c.y < 0.0) uv.y = -uv.y; } else { w = a.z; uv = -c.xy; if (c.z > 0.0) uv.x = -uv.x; } )"; out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; out += R"( vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); vec2 coord_floor = floor(coord); vec2 f = coord - coord_floor; ivec2 i00 = ivec2(coord_floor); ivec2 i10 = i00 + ivec2(1, 0); ivec2 i01 = i00 + ivec2(0, 1); ivec2 i11 = i00 + ivec2(1, 1); ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); i00 = clamp(i00, cmin, cmax); i10 = clamp(i10, cmin, cmax); i01 = clamp(i01, cmin, cmax); i11 = clamp(i11, cmin, cmax); uvec4 pixels; // This part should have been refactored into functions, // but many drivers don't like passing uimage2D as parameters if (a.x > a.y && a.x > a.z) { if (c.x > 0.0) pixels = uvec4( imageLoad(shadow_texture_px, i00).r, imageLoad(shadow_texture_px, i10).r, imageLoad(shadow_texture_px, i01).r, imageLoad(shadow_texture_px, i11).r); else pixels = uvec4( imageLoad(shadow_texture_nx, i00).r, imageLoad(shadow_texture_nx, i10).r, imageLoad(shadow_texture_nx, i01).r, imageLoad(shadow_texture_nx, i11).r); } else if (a.y > a.z) { if (c.y > 0.0) pixels = uvec4( imageLoad(shadow_texture_py, i00).r, imageLoad(shadow_texture_py, i10).r, imageLoad(shadow_texture_py, i01).r, imageLoad(shadow_texture_py, i11).r); else pixels = uvec4( imageLoad(shadow_texture_ny, i00).r, imageLoad(shadow_texture_ny, i10).r, imageLoad(shadow_texture_ny, i01).r, imageLoad(shadow_texture_ny, i11).r); } else { if (c.z > 0.0) pixels = uvec4( imageLoad(shadow_texture_pz, i00).r, imageLoad(shadow_texture_pz, i10).r, imageLoad(shadow_texture_pz, i01).r, imageLoad(shadow_texture_pz, i11).r); else pixels = uvec4( imageLoad(shadow_texture_nz, i00).r, imageLoad(shadow_texture_nz, i10).r, imageLoad(shadow_texture_nz, i01).r, imageLoad(shadow_texture_nz, i11).r); } vec4 s = vec4( CompareShadow(pixels.x, z), CompareShadow(pixels.y, z), CompareShadow(pixels.z, z), CompareShadow(pixels.w, z)); return vec4(mix2(s, f)); } )"; if (config.state.proctex.enable) AppendProcTexSampler(out, config); // We round the interpolated primary color to the nearest 1/255th // This maintains the PICA's 8 bits of precision out += R"( void main() { vec4 rounded_primary_color = byteround(primary_color); vec4 primary_fragment_color = vec4(0.0); vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { out += "discard; }"; return {std::move(out)}; } // Append the scissor test if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) { out += "if ("; // Negate the condition if we have to keep only the pixels outside the scissor box if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { out += '!'; } out += "(gl_FragCoord.x >= float(scissor_x1) && " "gl_FragCoord.y >= float(scissor_y1) && " "gl_FragCoord.x < float(scissor_x2) && " "gl_FragCoord.y < float(scissor_y2))) discard;\n"; } // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then // do our own transformation according to PICA specification. out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; } if (state.lighting.enable) WriteLighting(out, config); out += "vec4 combiner_buffer = vec4(0.0);\n" "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" "vec4 last_tex_env_out = vec4(0.0);\n"; for (std::size_t index = 0; index < state.tev_stages.size(); ++index) { WriteTevStage(out, config, static_cast(index)); } if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { out += "if ("; AppendAlphaTestCondition(out, state.alpha_test_func); out += ") discard;\n"; } // Append fog combiner if (state.fog_mode == TexturingRegs::FogMode::Fog) { // Get index into fog LUT if (state.fog_flip) { out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; } else { out += "float fog_index = depth * 128.0;\n"; } // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" "float fog_f = fog_index - fog_i;\n" "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " "fog_lut_offset).rg;\n" "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; // Blend the fog out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { Core::System::GetInstance().TelemetrySession().AddField( Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); out += "discard; }"; return {std::move(out)}; } if (state.shadow_rendering) { out += R"( uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); uint s = uint(last_tex_env_out.g * float(0xFF)); ivec2 image_coord = ivec2(gl_FragCoord.xy); uint old = imageLoad(shadow_buffer, image_coord).x; uint new; uint old2; do { old2 = old; uvec2 ref = DecodeShadow(old); if (d < ref.x) { if (s == 0u) { ref.x = d; } else { s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); ref.y = min(s, ref.y); } } new = EncodeShadow(ref); } while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); )"; } else { out += "gl_FragDepth = depth;\n"; // Round the final fragment color to maintain the PICA's 8 bits of precision out += "color = byteround(last_tex_env_out);\n"; } if (GLES) { if (!state.alphablend_enable) { switch (state.logic_op) { case FramebufferRegs::LogicOp::Clear: out += "color = vec4(0);\n"; break; case FramebufferRegs::LogicOp::Set: out += "color = vec4(1);\n"; break; case FramebufferRegs::LogicOp::Copy: // Take the color output as-is break; case FramebufferRegs::LogicOp::CopyInverted: out += "color = ~color;\n"; break; case FramebufferRegs::LogicOp::NoOp: // We need to discard the color, but not necessarily the depth. This is not possible // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. break; default: LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast(state.logic_op)); UNIMPLEMENTED(); } } } out += '}'; return {std::move(out)}; } ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { std::string out; if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } out += fmt::format("layout(location = {}) in vec4 vert_position;\n" "layout(location = {}) in vec4 vert_color;\n" "layout(location = {}) in vec2 vert_texcoord0;\n" "layout(location = {}) in vec2 vert_texcoord1;\n" "layout(location = {}) in vec2 vert_texcoord2;\n" "layout(location = {}) in float vert_texcoord0_w;\n" "layout(location = {}) in vec4 vert_normquat;\n" "layout(location = {}) in vec3 vert_view;\n", ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); out += GetVertexInterfaceDeclaration(true, separable_shader); out += UniformBlockDef; out += R"( void main() { primary_color = vert_color; texcoord0 = vert_texcoord0; texcoord1 = vert_texcoord1; texcoord2 = vert_texcoord2; texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; gl_Position = vert_position; #if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 gl_ClipDistance[1] = dot(clip_coef, vert_position); #endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) } )"; return {std::move(out)}; } std::optional GenerateVertexShader( const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { std::string out; if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n"; } out += ShaderDecompiler::GetCommonDeclarations(); std::array used_regs{}; const auto get_input_reg = [&used_regs](u32 reg) { ASSERT(reg < 16); used_regs[reg] = true; return fmt::format("vs_in_reg{}", reg); }; const auto get_output_reg = [&](u32 reg) -> std::string { ASSERT(reg < 16); if (config.state.output_map[reg] < config.state.num_outputs) { return fmt::format("vs_out_attr{}", config.state.output_map[reg]); } return ""; }; auto program_source_opt = ShaderDecompiler::DecompileProgram( setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg, get_output_reg, config.state.sanitize_mul); if (!program_source_opt) return std::nullopt; std::string& program_source = program_source_opt->code; out += R"( #define uniforms vs_uniforms layout (std140) uniform vs_config { pica_uniforms uniforms; }; )"; // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { if (used_regs[i]) { out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); } } out += '\n'; // output attributes declaration for (u32 i = 0; i < config.state.num_outputs; ++i) { out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) + " out vec4 vs_out_attr" + std::to_string(i) + ";\n"; } out += "\nvoid main() {\n"; for (u32 i = 0; i < config.state.num_outputs; ++i) { out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); } out += "\n exec_shader();\n}\n\n"; out += program_source; return {{std::move(out)}}; } static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) { std::string out = GetVertexInterfaceDeclaration(true, separable_shader); out += UniformBlockDef; out += ShaderDecompiler::GetCommonDeclarations(); out += '\n'; for (u32 i = 0; i < config.vs_output_attributes; ++i) { out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) + " in vec4 vs_out_attr" + std::to_string(i) + "[];\n"; } out += R"( struct Vertex { )"; out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes); out += "};\n\n"; const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string { const u32 slot = static_cast(slot_semantic); const u32 attrib = config.semantic_maps[slot].attribute_index; const u32 comp = config.semantic_maps[slot].component_index; if (attrib < config.gs_output_attributes) { return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]); } return "0.0"; }; out += "vec4 GetVertexQuaternion(Vertex vtx) {\n"; out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + semantic(VSOutputAttributes::QUATERNION_Z) + ", " + semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; out += "}\n\n"; out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n"; out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::POSITION_W) + ");\n"; out += " gl_Position = vtx_pos;\n"; out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n"; out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n"; out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + ");\n\n"; out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; out += " EmitVertex();\n"; out += "}\n"; out += R"( bool AreQuaternionsOpposite(vec4 qa, vec4 qb) { return (dot(qa, qb) < 0.0); } void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { EmitVtx(vtx0, false); EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1))); EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2))); EndPrimitive(); } )"; return out; }; ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { std::string out; if (separable_shader && !GLES) { out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; } out += R"( layout(triangles) in; layout(triangle_strip, max_vertices = 3) out; )"; out += GetGSCommonSource(config.state, separable_shader); out += R"( void main() { Vertex prim_buffer[3]; )"; for (u32 vtx = 0; vtx < 3; ++vtx) { out += fmt::format(" prim_buffer[{}].attributes = vec4[{}](", vtx, config.state.gs_output_attributes); for (u32 i = 0; i < config.state.vs_output_attributes; ++i) { out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx); } out += ");\n"; } out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; out += "}\n"; return {std::move(out)}; } } // namespace OpenGL