citra/src/video_core/renderer_opengl/gl_shader_gen.cpp

1785 lines
72 KiB
C++
Raw Normal View History

// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <string_view>
#include <fmt/format.h>
#include "common/bit_set.h"
#include "common/logging/log.h"
#include "core/core.h"
2023-08-01 02:57:38 +02:00
#include "core/telemetry_session.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_vars.h"
Prepare frontend for multiple graphics APIs (#6347) * externals: Update dynarmic * settings: Introduce GraphicsAPI enum * For now it's OpenGL only but will be expanded upon later * citra_qt: Introduce backend agnostic context management * Mostly a direct port from yuzu * core: Simplify context acquire * settings: Add option to create debug contexts * renderer_opengl: Abstract initialization to Driver * This commit also updates glad and adds some useful extensions which we will use in part 2 * Rasterizer construction is moved to the specific renderer instead of RendererBase. Software rendering has been disable to achieve this but will be brought back in the next commit. * video_core: Remove Init/Shutdown methods from renderer * The constructor and destructor can do the same job * In addition move opengl function loading to Qt since SDL already does this. Also remove ErrorVideoCore which is never reached * citra_qt: Decouple software renderer from opengl part 1 * citra: Decouple software renderer from opengl part 2 * android: Decouple software renderer from opengl part 3 * swrasterizer: Decouple software renderer from opengl part 4 * This commit simply enforces the renderer naming conventions in the software renderer * video_core: Move RendererBase to VideoCore * video_core: De-globalize screenshot state * video_core: Pass system to the renderers * video_core: Commonize shader uniform data * video_core: Abstract backend agnostic rasterizer operations * bootmanager: Remove references to OpenGL for macOS OpenGL macOS headers definitions clash heavily with each other * citra_qt: Proper title for api settings * video_core: Reduce boost usage * bootmanager: Fix hide mouse option Remove event handlers from RenderWidget for events that are already handled by the parent GRenderWindow. Also enable mouse tracking on the RenderWidget. * android: Remove software from graphics api list * code: Address review comments * citra: Port per-game settings read * Having to update the default value for all backends is a pain so lets centralize it * android: Rename to OpenGLES --------- Co-authored-by: MerryMage <MerryMage@users.noreply.github.com> Co-authored-by: Vitor Kiguchi <vitor-kiguchi@hotmail.com>
2023-03-27 13:29:17 +02:00
#include "video_core/shader/shader_uniforms.h"
#include "video_core/video_core.h"
2017-01-29 00:12:09 +01:00
using Pica::FramebufferRegs;
using Pica::LightingRegs;
2017-01-29 00:12:09 +01:00
using Pica::RasterizerRegs;
using Pica::TexturingRegs;
using TevStageConfig = TexturingRegs::TevStageConfig;
using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;
namespace OpenGL {
Prepare frontend for multiple graphics APIs (#6347) * externals: Update dynarmic * settings: Introduce GraphicsAPI enum * For now it's OpenGL only but will be expanded upon later * citra_qt: Introduce backend agnostic context management * Mostly a direct port from yuzu * core: Simplify context acquire * settings: Add option to create debug contexts * renderer_opengl: Abstract initialization to Driver * This commit also updates glad and adds some useful extensions which we will use in part 2 * Rasterizer construction is moved to the specific renderer instead of RendererBase. Software rendering has been disable to achieve this but will be brought back in the next commit. * video_core: Remove Init/Shutdown methods from renderer * The constructor and destructor can do the same job * In addition move opengl function loading to Qt since SDL already does this. Also remove ErrorVideoCore which is never reached * citra_qt: Decouple software renderer from opengl part 1 * citra: Decouple software renderer from opengl part 2 * android: Decouple software renderer from opengl part 3 * swrasterizer: Decouple software renderer from opengl part 4 * This commit simply enforces the renderer naming conventions in the software renderer * video_core: Move RendererBase to VideoCore * video_core: De-globalize screenshot state * video_core: Pass system to the renderers * video_core: Commonize shader uniform data * video_core: Abstract backend agnostic rasterizer operations * bootmanager: Remove references to OpenGL for macOS OpenGL macOS headers definitions clash heavily with each other * citra_qt: Proper title for api settings * video_core: Reduce boost usage * bootmanager: Fix hide mouse option Remove event handlers from RenderWidget for events that are already handled by the parent GRenderWindow. Also enable mouse tracking on the RenderWidget. * android: Remove software from graphics api list * code: Address review comments * citra: Port per-game settings read * Having to update the default value for all backends is a pain so lets centralize it * android: Rename to OpenGLES --------- Co-authored-by: MerryMage <MerryMage@users.noreply.github.com> Co-authored-by: Vitor Kiguchi <vitor-kiguchi@hotmail.com>
2023-03-27 13:29:17 +02:00
const std::string UniformBlockDef = Pica::Shader::BuildShaderUniformDefinitions();
static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) {
std::string out;
const auto append_variable = [&](std::string_view var, int location) {
if (separable_shader) {
out += fmt::format("layout (location={}) ", location);
}
out += fmt::format("{}{};\n", is_output ? "out " : "in ", var);
};
append_variable("vec4 primary_color", ATTRIBUTE_COLOR);
append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0);
append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1);
append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2);
append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W);
append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT);
append_variable("vec3 view", ATTRIBUTE_VIEW);
if (is_output && separable_shader) {
// gl_PerVertex redeclaration is required for separate shader object
out += R"(
out gl_PerVertex {
vec4 gl_Position;
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
float gl_ClipDistance[2];
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
};
)";
}
return out;
}
Custom textures rewrite (#6452) * common: Add thread pool from yuzu * Is really useful for asynchronous operations like shader compilation and custom textures, will be used in following PRs * core: Improve ImageInterface * Provide a default implementation so frontends don't have to duplicate code registering the lodepng version * Add a dds version too which we will use in the next commit * rasterizer_cache: Rewrite custom textures * There's just too much to talk about here, look at the PR description for more details * rasterizer_cache: Implement basic pack configuration file * custom_tex_manager: Flip dumped textures * custom_tex_manager: Optimize custom texture hashing * If no convertions are needed then we can hash the decoded data directly removing the needed for duplicate decode * custom_tex_manager: Implement asynchronous texture loading * The file loading and decoding is offloaded into worker threads, while the upload itself still occurs in the main thread to avoid having to manage shared contexts * Address review comments * custom_tex_manager: Introduce custom material support * video_core: Move custom textures to separate directory * Also split the files to make the code cleaner * gl_texture_runtime: Generate mipmaps for material * custom_tex_manager: Prevent memory overflow when preloading * externals: Add dds-ktx as submodule * string_util: Return vector from SplitString * No code benefits from passing it as an argument * custom_textures: Use json config file * gl_rasterizer: Only bind material for unit 0 * Address review comments
2023-04-27 06:38:28 +02:00
PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool use_normal) {
PicaFSConfig res{};
auto& state = res.state;
state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
state.depthmap_enable = regs.rasterizer.depthmap_enable;
state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
? regs.framebuffer.output_merger.alpha_test.func.Value()
: FramebufferRegs::CompareFunc::Always;
state.texture0_type = regs.texturing.texture0.type;
state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
if (GLES) {
// With GLES, we need this in the fragment shader to emulate logic operations
state.alphablend_enable =
Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1;
state.logic_op = regs.framebuffer.output_merger.logic_op;
} else {
// We don't need these otherwise, reset them to avoid unnecessary shader generation
state.alphablend_enable = {};
state.logic_op = {};
}
// Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a
// shader uniform instead.
const auto& tev_stages = regs.texturing.GetTevStages();
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
for (std::size_t i = 0; i < tev_stages.size(); i++) {
const auto& tev_stage = tev_stages[i];
state.tev_stages[i].sources_raw = tev_stage.sources_raw;
state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
state.tev_stages[i].ops_raw = tev_stage.ops_raw;
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
}
state.fog_mode = regs.texturing.fog_mode;
state.fog_flip = regs.texturing.fog_flip != 0;
state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
<< 4;
// Fragment lighting
state.lighting.enable = !regs.lighting.disable;
state.lighting.src_num = regs.lighting.max_light_index + 1;
for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
unsigned num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
state.lighting.light[light_index].num = num;
state.lighting.light[light_index].directional = light.config.directional != 0;
state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
state.lighting.light[light_index].dist_atten_enable =
!regs.lighting.IsDistAttenDisabled(num);
2017-05-25 19:15:12 +02:00
state.lighting.light[light_index].spot_atten_enable =
!regs.lighting.IsSpotAttenDisabled(num);
state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
}
state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
2017-05-25 19:15:12 +02:00
// this is a dummy field due to lack of the corresponding register
state.lighting.lut_sp.enable = true;
state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
state.lighting.config = regs.lighting.config0.config;
state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
state.lighting.bump_mode = regs.lighting.config0.bump_mode;
state.lighting.bump_selector = regs.lighting.config0.bump_selector;
state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
state.proctex.enable = regs.texturing.main_config.texture3_enable;
if (state.proctex.enable) {
state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
state.proctex.u_shift = regs.texturing.proctex.u_shift;
state.proctex.v_shift = regs.texturing.proctex.v_shift;
state.proctex.lut_width = regs.texturing.proctex_lut.width;
state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
}
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
FramebufferRegs::FragmentOperationMode::Shadow;
state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
Custom textures rewrite (#6452) * common: Add thread pool from yuzu * Is really useful for asynchronous operations like shader compilation and custom textures, will be used in following PRs * core: Improve ImageInterface * Provide a default implementation so frontends don't have to duplicate code registering the lodepng version * Add a dds version too which we will use in the next commit * rasterizer_cache: Rewrite custom textures * There's just too much to talk about here, look at the PR description for more details * rasterizer_cache: Implement basic pack configuration file * custom_tex_manager: Flip dumped textures * custom_tex_manager: Optimize custom texture hashing * If no convertions are needed then we can hash the decoded data directly removing the needed for duplicate decode * custom_tex_manager: Implement asynchronous texture loading * The file loading and decoding is offloaded into worker threads, while the upload itself still occurs in the main thread to avoid having to manage shared contexts * Address review comments * custom_tex_manager: Introduce custom material support * video_core: Move custom textures to separate directory * Also split the files to make the code cleaner * gl_texture_runtime: Generate mipmaps for material * custom_tex_manager: Prevent memory overflow when preloading * externals: Add dds-ktx as submodule * string_util: Return vector from SplitString * No code benefits from passing it as an argument * custom_textures: Use json config file * gl_rasterizer: Only bind material for unit 0 * Address review comments
2023-04-27 06:38:28 +02:00
state.use_custom_normal_map = use_normal;
return res;
}
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.main_offset;
sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
num_outputs = 0;
output_map.fill(16);
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
output_map[reg] = num_outputs++;
}
}
void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
gs_output_attributes = vs_output_attributes;
semantic_maps.fill({16, 0});
for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
const std::array semantics{
regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
};
for (u32 comp = 0; comp < 4; ++comp) {
const auto semantic = semantics[comp];
if (static_cast<std::size_t>(semantic) < 24) {
semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
} else if (semantic != VSOutputAttributes::INVALID) {
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
}
}
}
}
/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
static bool IsPassThroughTevStage(const TevStageConfig& stage) {
return (stage.color_op == TevStageConfig::Operation::Replace &&
stage.alpha_op == TevStageConfig::Operation::Replace &&
stage.color_source1 == TevStageConfig::Source::Previous &&
stage.alpha_source1 == TevStageConfig::Source::Previous &&
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
}
static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
const auto& state = config.state;
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case TexturingRegs::TextureConfig::Texture2D:
Rasterizer cache refactor (#6375) * rasterizer_cache: Remove custom texture code * It's a hacky buggy mess, will be reimplemented later when the cache is in a better state * rasterizer_cache: Refactor surface upload/download * Switch to the texture_codec header which was written as part of the vulkan backend by steveice and me * Move most of the upload logic to the rasterizer cache and out of the surface object * Scaled uploads/downloads have been disabled for now since they require more runtime infrastructure * rasterizer_cache: Refactor runtime interface * Remove aspect enum which is the same as SurfaceType * Replace Subresource with specific structures for each operation (blit/copy/clear). This mimics moderns APIs vulkan much better * Pass the surface to the runtime instead of the texture * Implement CopyTextures with glCopyImageSubData which is available on 4.3 and gles. This function also has an overload for cubes which will be removed later. * rasterizer_cache: Move texture allocation to the runtime * renderer_opengl: Remove TextureDownloaderES * It's overly compilcated and unused at the moment. Will be replaced with a simple compute shader in a later commit * rasterizer_cache: Split CachedSurface * This commit splits CachedSurface into two classes, SurfaceBase which contains the backend agnostic functions and Surface which is the opengl specific part * For now the cache uses the opengl surface directly and there are a few ugly casts with watchers, those will be taken care of when the template convertion and watcher removal are added respectively * rasterizer_cache: Move reinterpreters to the runtime * rasterizer_cache: Move some pixel format function to the cpp file * rasterizer_cache: Common texture acceleration functions * They don't contain any backend specific code so they shouldn't be duplicated * rasterizer_cache: Remove BlitSurfaces * It's better to prefer copy/blit in the caller anyway * rasterizer_cache: Only allocate needed levels * rasterizer_cache: Move texture runtime out of common dir * Also shorten the util header filename * surface_params: Cleanup code * Add more comments, organize it a bit etc * rasterizer_cache: Move texture filtering to the runtime * rasterizer_cache: Move to VideoCore * renderer_opengl: Reimplement scaled uploads/downloads * Instead of looking up for temporary textures, each allocation now contains both a scaled and unscaled handle This allows the scale operations to be done inside the surface object itself and improves performance in general * In particular the scaled download code has been expanded to use ARB_get_texture_sub_image when possible which is faster and more convenient than glReadPixels. The latter is still relevant for OpenGLES though. * Finally allocations are now given a handy debug name that can be viewed from renderdoc. * rasterizer_cache: Remove global state * gl_rasterizer: Abstract common draw operations to Framebuffer * This also allows to cache framebuffer objects instead of always swapping the textures, something that particularly benefits mali gpus * rasterizer_cache: Implement multi-level surfaces * With this commit the cache can now directly upload and use mipmaps without needing to sync them with watchers. By using native mimaps directly this also adds support for mipmap for cube * Texture cubes have also been updated to drop the watcher requirement * host_shaders: Add CMake integration for string shaders * Improves build time shader generation making it much less prone to errors. Also moves the presentation shaders here to avoid embedding them to the cpp file. * Texture filter shaders now make explicit use of uniform bindings for better vulkan compatibility * renderer_opengl: Emulate lod bias in the shader * This way opengles can emulate it correctly * gl_rasterizer: Respect GL_MAX_TEXTURE_BUFFER_SIZE * Older Bifrost Mali GPUs only support up to 64kb texture buffers. Citra would try to allocate a much larger buffer the first 64kb of which would work fine but after that the driver starts misbehaving and showing various graphical glitches * rasterizer_cache: Cleanup CopySurface * renderer_opengl: Keep frames synchronized when using a GPU debugger * rasterizer_cache: Rename Surface to SurfaceRef * Makes it clear that surface is a shared_ptr and not an object * rasterizer_cache: Cleanup * Move constructor to the top of the file * Move FindMatch to the top as well and remove the Invalid flag which was redudant; all FindMatch calls used it expect from MatchFlags::Copy which ignores it anyway * gl_texture_runtime: Make driver const * gl_texture_runtime: Fix RGB8 format handling * The texture_codec header, being written with vulkan in mind converts RGB8 to RGBA8. The backend wasn't adjusted to account for this though and treated the data as RGB8. * Also remove D16 convertions, both opengl and vulkan are required to support this format so these are not needed * gl_texture_runtime: Reduce state switches during FBO blits * glBlitFramebuffer is only affected by the scissor rectangle so just disable scissor testing instead of resetting our entire state * surface_params: Prevent texcopy that spans multiple levels * It would have failed before as well, with multi-level surfaces it triggers the assert though * renderer_opengl: Centralize texture filters * A lot of code is shared between the filters thus is makes it sense to centralize them * Also fix an issue with partial texture uploads * Address review comments * rasterizer_cache: Use leading return types * rasterizer_cache: Cleanup null checks * renderer_opengl: Add additional logging * externals: Actually downgrade glad * For some reason I missed adding the files to git * surface_params: Do not check for levels in exact match * Some games will try to use the base level of a multi level surface. Checking for levels forces another surface to be created and a copy to be made which is both unncessary and breaks custom textures --------- Co-authored-by: bunnei <bunneidev@gmail.com>
2023-04-21 09:14:55 +02:00
return "textureLod(tex0, texcoord0, getLod(texcoord0 * vec2(textureSize(tex0, 0))) + "
"tex_lod_bias[0])";
case TexturingRegs::TextureConfig::Projection2D:
// TODO (wwylele): find the exact LOD formula for projection texture
return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
2018-03-09 15:46:34 +01:00
case TexturingRegs::TextureConfig::TextureCube:
return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::Shadow2D:
return "shadowTexture(texcoord0, texcoord0_w)";
case TexturingRegs::TextureConfig::ShadowCube:
return "shadowTextureCube(texcoord0, texcoord0_w)";
case TexturingRegs::TextureConfig::Disabled:
return "vec4(0.0)";
default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
UNIMPLEMENTED();
return "texture(tex0, texcoord0)";
}
case 1:
Rasterizer cache refactor (#6375) * rasterizer_cache: Remove custom texture code * It's a hacky buggy mess, will be reimplemented later when the cache is in a better state * rasterizer_cache: Refactor surface upload/download * Switch to the texture_codec header which was written as part of the vulkan backend by steveice and me * Move most of the upload logic to the rasterizer cache and out of the surface object * Scaled uploads/downloads have been disabled for now since they require more runtime infrastructure * rasterizer_cache: Refactor runtime interface * Remove aspect enum which is the same as SurfaceType * Replace Subresource with specific structures for each operation (blit/copy/clear). This mimics moderns APIs vulkan much better * Pass the surface to the runtime instead of the texture * Implement CopyTextures with glCopyImageSubData which is available on 4.3 and gles. This function also has an overload for cubes which will be removed later. * rasterizer_cache: Move texture allocation to the runtime * renderer_opengl: Remove TextureDownloaderES * It's overly compilcated and unused at the moment. Will be replaced with a simple compute shader in a later commit * rasterizer_cache: Split CachedSurface * This commit splits CachedSurface into two classes, SurfaceBase which contains the backend agnostic functions and Surface which is the opengl specific part * For now the cache uses the opengl surface directly and there are a few ugly casts with watchers, those will be taken care of when the template convertion and watcher removal are added respectively * rasterizer_cache: Move reinterpreters to the runtime * rasterizer_cache: Move some pixel format function to the cpp file * rasterizer_cache: Common texture acceleration functions * They don't contain any backend specific code so they shouldn't be duplicated * rasterizer_cache: Remove BlitSurfaces * It's better to prefer copy/blit in the caller anyway * rasterizer_cache: Only allocate needed levels * rasterizer_cache: Move texture runtime out of common dir * Also shorten the util header filename * surface_params: Cleanup code * Add more comments, organize it a bit etc * rasterizer_cache: Move texture filtering to the runtime * rasterizer_cache: Move to VideoCore * renderer_opengl: Reimplement scaled uploads/downloads * Instead of looking up for temporary textures, each allocation now contains both a scaled and unscaled handle This allows the scale operations to be done inside the surface object itself and improves performance in general * In particular the scaled download code has been expanded to use ARB_get_texture_sub_image when possible which is faster and more convenient than glReadPixels. The latter is still relevant for OpenGLES though. * Finally allocations are now given a handy debug name that can be viewed from renderdoc. * rasterizer_cache: Remove global state * gl_rasterizer: Abstract common draw operations to Framebuffer * This also allows to cache framebuffer objects instead of always swapping the textures, something that particularly benefits mali gpus * rasterizer_cache: Implement multi-level surfaces * With this commit the cache can now directly upload and use mipmaps without needing to sync them with watchers. By using native mimaps directly this also adds support for mipmap for cube * Texture cubes have also been updated to drop the watcher requirement * host_shaders: Add CMake integration for string shaders * Improves build time shader generation making it much less prone to errors. Also moves the presentation shaders here to avoid embedding them to the cpp file. * Texture filter shaders now make explicit use of uniform bindings for better vulkan compatibility * renderer_opengl: Emulate lod bias in the shader * This way opengles can emulate it correctly * gl_rasterizer: Respect GL_MAX_TEXTURE_BUFFER_SIZE * Older Bifrost Mali GPUs only support up to 64kb texture buffers. Citra would try to allocate a much larger buffer the first 64kb of which would work fine but after that the driver starts misbehaving and showing various graphical glitches * rasterizer_cache: Cleanup CopySurface * renderer_opengl: Keep frames synchronized when using a GPU debugger * rasterizer_cache: Rename Surface to SurfaceRef * Makes it clear that surface is a shared_ptr and not an object * rasterizer_cache: Cleanup * Move constructor to the top of the file * Move FindMatch to the top as well and remove the Invalid flag which was redudant; all FindMatch calls used it expect from MatchFlags::Copy which ignores it anyway * gl_texture_runtime: Make driver const * gl_texture_runtime: Fix RGB8 format handling * The texture_codec header, being written with vulkan in mind converts RGB8 to RGBA8. The backend wasn't adjusted to account for this though and treated the data as RGB8. * Also remove D16 convertions, both opengl and vulkan are required to support this format so these are not needed * gl_texture_runtime: Reduce state switches during FBO blits * glBlitFramebuffer is only affected by the scissor rectangle so just disable scissor testing instead of resetting our entire state * surface_params: Prevent texcopy that spans multiple levels * It would have failed before as well, with multi-level surfaces it triggers the assert though * renderer_opengl: Centralize texture filters * A lot of code is shared between the filters thus is makes it sense to centralize them * Also fix an issue with partial texture uploads * Address review comments * rasterizer_cache: Use leading return types * rasterizer_cache: Cleanup null checks * renderer_opengl: Add additional logging * externals: Actually downgrade glad * For some reason I missed adding the files to git * surface_params: Do not check for levels in exact match * Some games will try to use the base level of a multi level surface. Checking for levels forces another surface to be created and a copy to be made which is both unncessary and breaks custom textures --------- Co-authored-by: bunnei <bunneidev@gmail.com>
2023-04-21 09:14:55 +02:00
return "textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, 0))) + "
"tex_lod_bias[1])";
case 2:
if (state.texture2_use_coord1)
Rasterizer cache refactor (#6375) * rasterizer_cache: Remove custom texture code * It's a hacky buggy mess, will be reimplemented later when the cache is in a better state * rasterizer_cache: Refactor surface upload/download * Switch to the texture_codec header which was written as part of the vulkan backend by steveice and me * Move most of the upload logic to the rasterizer cache and out of the surface object * Scaled uploads/downloads have been disabled for now since they require more runtime infrastructure * rasterizer_cache: Refactor runtime interface * Remove aspect enum which is the same as SurfaceType * Replace Subresource with specific structures for each operation (blit/copy/clear). This mimics moderns APIs vulkan much better * Pass the surface to the runtime instead of the texture * Implement CopyTextures with glCopyImageSubData which is available on 4.3 and gles. This function also has an overload for cubes which will be removed later. * rasterizer_cache: Move texture allocation to the runtime * renderer_opengl: Remove TextureDownloaderES * It's overly compilcated and unused at the moment. Will be replaced with a simple compute shader in a later commit * rasterizer_cache: Split CachedSurface * This commit splits CachedSurface into two classes, SurfaceBase which contains the backend agnostic functions and Surface which is the opengl specific part * For now the cache uses the opengl surface directly and there are a few ugly casts with watchers, those will be taken care of when the template convertion and watcher removal are added respectively * rasterizer_cache: Move reinterpreters to the runtime * rasterizer_cache: Move some pixel format function to the cpp file * rasterizer_cache: Common texture acceleration functions * They don't contain any backend specific code so they shouldn't be duplicated * rasterizer_cache: Remove BlitSurfaces * It's better to prefer copy/blit in the caller anyway * rasterizer_cache: Only allocate needed levels * rasterizer_cache: Move texture runtime out of common dir * Also shorten the util header filename * surface_params: Cleanup code * Add more comments, organize it a bit etc * rasterizer_cache: Move texture filtering to the runtime * rasterizer_cache: Move to VideoCore * renderer_opengl: Reimplement scaled uploads/downloads * Instead of looking up for temporary textures, each allocation now contains both a scaled and unscaled handle This allows the scale operations to be done inside the surface object itself and improves performance in general * In particular the scaled download code has been expanded to use ARB_get_texture_sub_image when possible which is faster and more convenient than glReadPixels. The latter is still relevant for OpenGLES though. * Finally allocations are now given a handy debug name that can be viewed from renderdoc. * rasterizer_cache: Remove global state * gl_rasterizer: Abstract common draw operations to Framebuffer * This also allows to cache framebuffer objects instead of always swapping the textures, something that particularly benefits mali gpus * rasterizer_cache: Implement multi-level surfaces * With this commit the cache can now directly upload and use mipmaps without needing to sync them with watchers. By using native mimaps directly this also adds support for mipmap for cube * Texture cubes have also been updated to drop the watcher requirement * host_shaders: Add CMake integration for string shaders * Improves build time shader generation making it much less prone to errors. Also moves the presentation shaders here to avoid embedding them to the cpp file. * Texture filter shaders now make explicit use of uniform bindings for better vulkan compatibility * renderer_opengl: Emulate lod bias in the shader * This way opengles can emulate it correctly * gl_rasterizer: Respect GL_MAX_TEXTURE_BUFFER_SIZE * Older Bifrost Mali GPUs only support up to 64kb texture buffers. Citra would try to allocate a much larger buffer the first 64kb of which would work fine but after that the driver starts misbehaving and showing various graphical glitches * rasterizer_cache: Cleanup CopySurface * renderer_opengl: Keep frames synchronized when using a GPU debugger * rasterizer_cache: Rename Surface to SurfaceRef * Makes it clear that surface is a shared_ptr and not an object * rasterizer_cache: Cleanup * Move constructor to the top of the file * Move FindMatch to the top as well and remove the Invalid flag which was redudant; all FindMatch calls used it expect from MatchFlags::Copy which ignores it anyway * gl_texture_runtime: Make driver const * gl_texture_runtime: Fix RGB8 format handling * The texture_codec header, being written with vulkan in mind converts RGB8 to RGBA8. The backend wasn't adjusted to account for this though and treated the data as RGB8. * Also remove D16 convertions, both opengl and vulkan are required to support this format so these are not needed * gl_texture_runtime: Reduce state switches during FBO blits * glBlitFramebuffer is only affected by the scissor rectangle so just disable scissor testing instead of resetting our entire state * surface_params: Prevent texcopy that spans multiple levels * It would have failed before as well, with multi-level surfaces it triggers the assert though * renderer_opengl: Centralize texture filters * A lot of code is shared between the filters thus is makes it sense to centralize them * Also fix an issue with partial texture uploads * Address review comments * rasterizer_cache: Use leading return types * rasterizer_cache: Cleanup null checks * renderer_opengl: Add additional logging * externals: Actually downgrade glad * For some reason I missed adding the files to git * surface_params: Do not check for levels in exact match * Some games will try to use the base level of a multi level surface. Checking for levels forces another surface to be created and a copy to be made which is both unncessary and breaks custom textures --------- Co-authored-by: bunnei <bunneidev@gmail.com>
2023-04-21 09:14:55 +02:00
return "textureLod(tex2, texcoord1, getLod(texcoord1 * vec2(textureSize(tex2, 0))) + "
"tex_lod_bias[2])";
else
Rasterizer cache refactor (#6375) * rasterizer_cache: Remove custom texture code * It's a hacky buggy mess, will be reimplemented later when the cache is in a better state * rasterizer_cache: Refactor surface upload/download * Switch to the texture_codec header which was written as part of the vulkan backend by steveice and me * Move most of the upload logic to the rasterizer cache and out of the surface object * Scaled uploads/downloads have been disabled for now since they require more runtime infrastructure * rasterizer_cache: Refactor runtime interface * Remove aspect enum which is the same as SurfaceType * Replace Subresource with specific structures for each operation (blit/copy/clear). This mimics moderns APIs vulkan much better * Pass the surface to the runtime instead of the texture * Implement CopyTextures with glCopyImageSubData which is available on 4.3 and gles. This function also has an overload for cubes which will be removed later. * rasterizer_cache: Move texture allocation to the runtime * renderer_opengl: Remove TextureDownloaderES * It's overly compilcated and unused at the moment. Will be replaced with a simple compute shader in a later commit * rasterizer_cache: Split CachedSurface * This commit splits CachedSurface into two classes, SurfaceBase which contains the backend agnostic functions and Surface which is the opengl specific part * For now the cache uses the opengl surface directly and there are a few ugly casts with watchers, those will be taken care of when the template convertion and watcher removal are added respectively * rasterizer_cache: Move reinterpreters to the runtime * rasterizer_cache: Move some pixel format function to the cpp file * rasterizer_cache: Common texture acceleration functions * They don't contain any backend specific code so they shouldn't be duplicated * rasterizer_cache: Remove BlitSurfaces * It's better to prefer copy/blit in the caller anyway * rasterizer_cache: Only allocate needed levels * rasterizer_cache: Move texture runtime out of common dir * Also shorten the util header filename * surface_params: Cleanup code * Add more comments, organize it a bit etc * rasterizer_cache: Move texture filtering to the runtime * rasterizer_cache: Move to VideoCore * renderer_opengl: Reimplement scaled uploads/downloads * Instead of looking up for temporary textures, each allocation now contains both a scaled and unscaled handle This allows the scale operations to be done inside the surface object itself and improves performance in general * In particular the scaled download code has been expanded to use ARB_get_texture_sub_image when possible which is faster and more convenient than glReadPixels. The latter is still relevant for OpenGLES though. * Finally allocations are now given a handy debug name that can be viewed from renderdoc. * rasterizer_cache: Remove global state * gl_rasterizer: Abstract common draw operations to Framebuffer * This also allows to cache framebuffer objects instead of always swapping the textures, something that particularly benefits mali gpus * rasterizer_cache: Implement multi-level surfaces * With this commit the cache can now directly upload and use mipmaps without needing to sync them with watchers. By using native mimaps directly this also adds support for mipmap for cube * Texture cubes have also been updated to drop the watcher requirement * host_shaders: Add CMake integration for string shaders * Improves build time shader generation making it much less prone to errors. Also moves the presentation shaders here to avoid embedding them to the cpp file. * Texture filter shaders now make explicit use of uniform bindings for better vulkan compatibility * renderer_opengl: Emulate lod bias in the shader * This way opengles can emulate it correctly * gl_rasterizer: Respect GL_MAX_TEXTURE_BUFFER_SIZE * Older Bifrost Mali GPUs only support up to 64kb texture buffers. Citra would try to allocate a much larger buffer the first 64kb of which would work fine but after that the driver starts misbehaving and showing various graphical glitches * rasterizer_cache: Cleanup CopySurface * renderer_opengl: Keep frames synchronized when using a GPU debugger * rasterizer_cache: Rename Surface to SurfaceRef * Makes it clear that surface is a shared_ptr and not an object * rasterizer_cache: Cleanup * Move constructor to the top of the file * Move FindMatch to the top as well and remove the Invalid flag which was redudant; all FindMatch calls used it expect from MatchFlags::Copy which ignores it anyway * gl_texture_runtime: Make driver const * gl_texture_runtime: Fix RGB8 format handling * The texture_codec header, being written with vulkan in mind converts RGB8 to RGBA8. The backend wasn't adjusted to account for this though and treated the data as RGB8. * Also remove D16 convertions, both opengl and vulkan are required to support this format so these are not needed * gl_texture_runtime: Reduce state switches during FBO blits * glBlitFramebuffer is only affected by the scissor rectangle so just disable scissor testing instead of resetting our entire state * surface_params: Prevent texcopy that spans multiple levels * It would have failed before as well, with multi-level surfaces it triggers the assert though * renderer_opengl: Centralize texture filters * A lot of code is shared between the filters thus is makes it sense to centralize them * Also fix an issue with partial texture uploads * Address review comments * rasterizer_cache: Use leading return types * rasterizer_cache: Cleanup null checks * renderer_opengl: Add additional logging * externals: Actually downgrade glad * For some reason I missed adding the files to git * surface_params: Do not check for levels in exact match * Some games will try to use the base level of a multi level surface. Checking for levels forces another surface to be created and a copy to be made which is both unncessary and breaks custom textures --------- Co-authored-by: bunnei <bunneidev@gmail.com>
2023-04-21 09:14:55 +02:00
return "textureLod(tex2, texcoord2, getLod(texcoord2 * vec2(textureSize(tex2, 0))) + "
"tex_lod_bias[2])";
case 3:
if (state.proctex.enable) {
return "ProcTex()";
} else {
2018-06-29 13:18:07 +02:00
LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it");
return "vec4(0.0)";
}
Custom textures rewrite (#6452) * common: Add thread pool from yuzu * Is really useful for asynchronous operations like shader compilation and custom textures, will be used in following PRs * core: Improve ImageInterface * Provide a default implementation so frontends don't have to duplicate code registering the lodepng version * Add a dds version too which we will use in the next commit * rasterizer_cache: Rewrite custom textures * There's just too much to talk about here, look at the PR description for more details * rasterizer_cache: Implement basic pack configuration file * custom_tex_manager: Flip dumped textures * custom_tex_manager: Optimize custom texture hashing * If no convertions are needed then we can hash the decoded data directly removing the needed for duplicate decode * custom_tex_manager: Implement asynchronous texture loading * The file loading and decoding is offloaded into worker threads, while the upload itself still occurs in the main thread to avoid having to manage shared contexts * Address review comments * custom_tex_manager: Introduce custom material support * video_core: Move custom textures to separate directory * Also split the files to make the code cleaner * gl_texture_runtime: Generate mipmaps for material * custom_tex_manager: Prevent memory overflow when preloading * externals: Add dds-ktx as submodule * string_util: Return vector from SplitString * No code benefits from passing it as an argument * custom_textures: Use json config file * gl_rasterizer: Only bind material for unit 0 * Address review comments
2023-04-27 06:38:28 +02:00
case 4:
return "texture(tex_normal, texcoord0)";
default:
UNREACHABLE();
return "";
}
}
/// Writes the specified TEV stage source component(s)
static void AppendSource(std::string& out, const PicaFSConfig& config,
TevStageConfig::Source source, std::string_view index_name) {
using Source = TevStageConfig::Source;
switch (source) {
case Source::PrimaryColor:
out += "rounded_primary_color";
break;
case Source::PrimaryFragmentColor:
out += "primary_fragment_color";
break;
case Source::SecondaryFragmentColor:
out += "secondary_fragment_color";
break;
case Source::Texture0:
out += SampleTexture(config, 0);
break;
case Source::Texture1:
out += SampleTexture(config, 1);
break;
case Source::Texture2:
out += SampleTexture(config, 2);
break;
case Source::Texture3:
out += SampleTexture(config, 3);
break;
case Source::PreviousBuffer:
out += "combiner_buffer";
break;
case Source::Constant:
out += "const_color[";
out += index_name;
out += ']';
break;
case Source::Previous:
out += "last_tex_env_out";
break;
default:
out += "vec4(0.0)";
LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source);
break;
}
}
/// Writes the color components to use for the specified TEV stage color modifier
static void AppendColorModifier(std::string& out, const PicaFSConfig& config,
TevStageConfig::ColorModifier modifier,
TevStageConfig::Source source, std::string_view index_name) {
using ColorModifier = TevStageConfig::ColorModifier;
switch (modifier) {
case ColorModifier::SourceColor:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".rgb";
break;
case ColorModifier::OneMinusSourceColor:
out += "vec3(1.0) - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".rgb";
break;
case ColorModifier::SourceAlpha:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".aaa";
break;
case ColorModifier::OneMinusSourceAlpha:
out += "vec3(1.0) - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".aaa";
break;
case ColorModifier::SourceRed:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".rrr";
break;
case ColorModifier::OneMinusSourceRed:
out += "vec3(1.0) - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".rrr";
break;
case ColorModifier::SourceGreen:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".ggg";
break;
case ColorModifier::OneMinusSourceGreen:
out += "vec3(1.0) - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".ggg";
break;
case ColorModifier::SourceBlue:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".bbb";
break;
case ColorModifier::OneMinusSourceBlue:
out += "vec3(1.0) - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".bbb";
break;
default:
out += "vec3(0.0)";
LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier);
break;
}
}
/// Writes the alpha component to use for the specified TEV stage alpha modifier
static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config,
TevStageConfig::AlphaModifier modifier,
TevStageConfig::Source source, const std::string& index_name) {
using AlphaModifier = TevStageConfig::AlphaModifier;
switch (modifier) {
case AlphaModifier::SourceAlpha:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".a";
break;
case AlphaModifier::OneMinusSourceAlpha:
out += "1.0 - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".a";
break;
case AlphaModifier::SourceRed:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".r";
break;
case AlphaModifier::OneMinusSourceRed:
out += "1.0 - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".r";
break;
case AlphaModifier::SourceGreen:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".g";
break;
case AlphaModifier::OneMinusSourceGreen:
out += "1.0 - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".g";
break;
case AlphaModifier::SourceBlue:
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".b";
break;
case AlphaModifier::OneMinusSourceBlue:
out += "1.0 - ";
2016-04-19 00:53:42 +02:00
AppendSource(out, config, source, index_name);
out += ".b";
break;
default:
out += "0.0";
LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier);
break;
}
}
/// Writes the combiner function for the color components for the specified TEV stage operation
static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation,
std::string_view variable_name) {
out += "clamp(";
using Operation = TevStageConfig::Operation;
switch (operation) {
case Operation::Replace:
out += fmt::format("{}[0]", variable_name);
break;
case Operation::Modulate:
out += fmt::format("{0}[0] * {0}[1]", variable_name);
break;
case Operation::Add:
out += fmt::format("{0}[0] + {0}[1]", variable_name);
break;
case Operation::AddSigned:
out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name);
break;
case Operation::Lerp:
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name);
break;
case Operation::Subtract:
out += fmt::format("{0}[0] - {0}[1]", variable_name);
break;
case Operation::MultiplyThenAdd:
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
break;
case Operation::AddThenMultiply:
out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name);
break;
case Operation::Dot3_RGB:
case Operation::Dot3_RGBA:
out +=
fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name);
break;
default:
out += "vec3(0.0)";
LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation);
break;
}
out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0
}
/// Writes the combiner function for the alpha component for the specified TEV stage operation
static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation,
std::string_view variable_name) {
out += "clamp(";
using Operation = TevStageConfig::Operation;
switch (operation) {
case Operation::Replace:
out += fmt::format("{}[0]", variable_name);
break;
case Operation::Modulate:
out += fmt::format("{0}[0] * {0}[1]", variable_name);
break;
case Operation::Add:
out += fmt::format("{0}[0] + {0}[1]", variable_name);
break;
case Operation::AddSigned:
out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name);
break;
case Operation::Lerp:
out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name);
break;
case Operation::Subtract:
out += fmt::format("{0}[0] - {0}[1]", variable_name);
break;
case Operation::MultiplyThenAdd:
out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
break;
case Operation::AddThenMultiply:
out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name);
break;
default:
out += "0.0";
LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation);
break;
}
out += ", 0.0, 1.0)";
}
/// Writes the if-statement condition used to evaluate alpha testing
2017-01-29 00:12:09 +01:00
static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
using CompareFunc = FramebufferRegs::CompareFunc;
switch (func) {
case CompareFunc::Never:
out += "true";
break;
case CompareFunc::Always:
out += "false";
break;
case CompareFunc::Equal:
case CompareFunc::NotEqual:
case CompareFunc::LessThan:
case CompareFunc::LessThanOrEqual:
case CompareFunc::GreaterThan:
case CompareFunc::GreaterThanOrEqual: {
static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"};
const auto index = static_cast<u32>(func) - static_cast<u32>(CompareFunc::Equal);
out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]);
break;
}
default:
out += "false";
LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func);
break;
}
}
/// Writes the code to emulate the specified TEV stage
static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) {
const auto stage =
2017-01-29 00:12:09 +01:00
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
if (!IsPassThroughTevStage(stage)) {
const std::string index_name = std::to_string(index);
out += fmt::format("vec3 color_results_{}_1 = ", index_name);
2016-04-19 00:53:42 +02:00
AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
2016-04-19 00:53:42 +02:00
AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
2016-04-19 00:53:42 +02:00
AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
"color_results_{}_2, color_results_{}_3);\n",
index_name, index_name, index_name, index_name);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
out += fmt::format("vec3 color_output_{} = byteround(", index_name);
AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
out += ");\n";
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name);
} else {
out += fmt::format("float alpha_results_{}[3] = float[3](", index_name);
AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
index_name);
out += ", ";
AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
index_name);
out += ", ";
AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
index_name);
out += ");\n";
out += fmt::format("float alpha_output_{} = byteround(", index_name);
AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
out += ");\n";
}
out += fmt::format("last_tex_env_out = vec4("
"clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
"clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
index_name, stage.GetColorMultiplier(), index_name,
stage.GetAlphaMultiplier());
}
out += "combiner_buffer = next_combiner_buffer;\n";
if (config.TevStageUpdatesCombinerBufferColor(index))
out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n";
if (config.TevStageUpdatesCombinerBufferAlpha(index))
out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
}
/// Writes the code to emulate fragment lighting
static void WriteLighting(std::string& out, const PicaFSConfig& config) {
2016-05-01 23:28:39 +02:00
const auto& lighting = config.state.lighting;
// Define lighting globals
out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
"vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
"vec3 light_vector = vec3(0.0);\n"
"float light_distance = 0.0;\n"
2017-05-26 15:23:33 +02:00
"vec3 refl_value = vec3(0.0);\n"
"vec3 spot_dir = vec3(0.0);\n"
"vec3 half_vector = vec3(0.0);\n"
"float dot_product = 0.0;\n"
"float clamp_highlights = 1.0;\n"
"float geo_factor = 1.0;\n";
// Compute fragment normals and tangents
const auto Perturbation = [&] {
return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector));
};
Custom textures rewrite (#6452) * common: Add thread pool from yuzu * Is really useful for asynchronous operations like shader compilation and custom textures, will be used in following PRs * core: Improve ImageInterface * Provide a default implementation so frontends don't have to duplicate code registering the lodepng version * Add a dds version too which we will use in the next commit * rasterizer_cache: Rewrite custom textures * There's just too much to talk about here, look at the PR description for more details * rasterizer_cache: Implement basic pack configuration file * custom_tex_manager: Flip dumped textures * custom_tex_manager: Optimize custom texture hashing * If no convertions are needed then we can hash the decoded data directly removing the needed for duplicate decode * custom_tex_manager: Implement asynchronous texture loading * The file loading and decoding is offloaded into worker threads, while the upload itself still occurs in the main thread to avoid having to manage shared contexts * Address review comments * custom_tex_manager: Introduce custom material support * video_core: Move custom textures to separate directory * Also split the files to make the code cleaner * gl_texture_runtime: Generate mipmaps for material * custom_tex_manager: Prevent memory overflow when preloading * externals: Add dds-ktx as submodule * string_util: Return vector from SplitString * No code benefits from passing it as an argument * custom_textures: Use json config file * gl_rasterizer: Only bind material for unit 0 * Address review comments
2023-04-27 06:38:28 +02:00
if (config.state.use_custom_normal_map) {
const std::string normal_texel =
fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, 4));
out += fmt::format("vec3 surface_normal = {};\n", normal_texel);
out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
// Bump mapping is enabled using a normal map
out += fmt::format("vec3 surface_normal = {};\n", Perturbation());
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
// precision result
2016-05-01 23:28:39 +02:00
if (lighting.bump_renorm) {
constexpr std::string_view val =
"(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val);
}
// The tangent vector is not perturbed by the normal map and is just a unit vector.
out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
// Bump mapping is enabled using a tangent map
out += fmt::format("vec3 surface_tangent = {};\n", Perturbation());
// Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
// computation below, which is also confirmed on 3DS. So we don't bother recomputing here
// even if 'renorm' is enabled.
// The normal vector is not perturbed by the tangent map and is just a unit vector.
out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
} else {
// No bump mapping - surface local normal and tangent are just unit vectors
out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"
"vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
}
// Rotate the surface-local normal by the interpolated normal quaternion to convert it to
// eyespace.
out += "vec4 normalized_normquat = normalize(normquat);\n"
"vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n"
"vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
if (lighting.enable_shadow) {
std::string shadow_texture = SampleTexture(config, lighting.shadow_selector);
if (lighting.shadow_invert) {
out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture);
} else {
out += fmt::format("vec4 shadow = {};\n", shadow_texture);
}
} else {
out += "vec4 shadow = vec4(1.0);\n";
}
// Samples the specified lookup table for specular lighting
auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num,
LightingRegs::LightingLutInput input, bool abs) {
std::string index;
switch (input) {
case LightingRegs::LightingLutInput::NH:
index = "dot(normal, normalize(half_vector))";
break;
case LightingRegs::LightingLutInput::VH:
index = "dot(normalize(view), normalize(half_vector))";
break;
case LightingRegs::LightingLutInput::NV:
index = "dot(normal, normalize(view))";
break;
case LightingRegs::LightingLutInput::LN:
index = "dot(light_vector, normal)";
break;
2017-05-26 15:23:33 +02:00
case LightingRegs::LightingLutInput::SP:
index = "dot(light_vector, spot_dir)";
2017-05-26 15:23:33 +02:00
break;
case LightingRegs::LightingLutInput::CP:
// CP input is only available with configuration 7
if (lighting.config == LightingRegs::LightingConfig::Config7) {
// Note: even if the normal vector is modified by normal map, which is not the
// normal of the tangent plane anymore, the half angle vector is still projected
// using the modified normal vector.
constexpr std::string_view half_angle_proj =
"normalize(half_vector) - normal * dot(normal, normalize(half_vector))";
// Note: the half angle vector projection is confirmed not normalized before the dot
// product. The result is in fact not cos(phi) as the name suggested.
index = fmt::format("dot({}, tangent)", half_angle_proj);
} else {
index = "0.0";
}
break;
default:
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input);
UNIMPLEMENTED();
index = "0.0";
break;
}
const auto sampler_index = static_cast<u32>(sampler);
if (abs) {
// LUT index is in the range of (0.0, 1.0)
index = lighting.light[light_num].two_sided_diffuse
? fmt::format("abs({})", index)
: fmt::format("max({}, 0.0)", index);
return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index);
} else {
// LUT index is in the range of (-1.0, 1.0)
return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index);
}
};
// Write the code to emulate each enabled light
2016-05-01 23:28:39 +02:00
for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
const auto& light_config = lighting.light[light_index];
const std::string light_src = fmt::format("light_src[{}]", light_config.num);
// Compute light vector (directional or positional)
if (light_config.directional) {
out += fmt::format("light_vector = {}.position;\n", light_src);
} else {
out += fmt::format("light_vector = {}.position + view;\n", light_src);
}
out += fmt::format("light_distance = length(light_vector);\n", light_src);
out += fmt::format("light_vector = normalize(light_vector);\n", light_src);
out += fmt::format("spot_dir = {}.spot_direction;\n", light_src);
out += "half_vector = normalize(view) + light_vector;\n";
2017-05-26 15:23:33 +02:00
// Compute dot product of light_vector and normal, adjust if lighting is one-sided or
// two-sided
out += std::string("dot_product = ") + (light_config.two_sided_diffuse
? "abs(dot(light_vector, normal));\n"
: "max(dot(light_vector, normal), 0.0);\n");
// If enabled, clamp specular component if lighting result is zero
if (lighting.clamp_highlights) {
out += "clamp_highlights = sign(dot_product);\n";
}
2017-05-26 15:23:33 +02:00
// If enabled, compute spot light attenuation value
std::string spot_atten = "1.0";
if (light_config.spot_atten_enable &&
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
const std::string value =
GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input);
spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value);
2017-05-26 15:23:33 +02:00
}
// If enabled, compute distance attenuation value
std::string dist_atten = "1.0";
if (light_config.dist_atten_enable) {
const std::string index = fmt::format("clamp({}.dist_atten_scale * light_distance "
"+ {}.dist_atten_bias, 0.0, 1.0)",
light_src, light_src, light_src);
const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num);
dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index);
}
if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
out += "geo_factor = dot(half_vector, half_vector);\n"
"geo_factor = geo_factor == 0.0 ? 0.0 : min("
"dot_product / geo_factor, 1.0);\n";
}
// Specular 0 component
std::string d0_lut_value = "1.0";
if (lighting.lut_d0.enable &&
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::Distribution0)) {
// Lookup specular "distribution 0" LUT value
const std::string value =
GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
lighting.lut_d0.type, lighting.lut_d0.abs_input);
d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value);
}
std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src);
if (light_config.geometric_factor_0) {
specular_0 = fmt::format("({} * geo_factor)", specular_0);
}
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
if (lighting.lut_rr.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectRed)) {
std::string value =
GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
lighting.lut_rr.type, lighting.lut_rr.abs_input);
value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value);
out += fmt::format("refl_value.r = {};\n", value);
} else {
out += "refl_value.r = 1.0;\n";
}
// If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
if (lighting.lut_rg.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectGreen)) {
std::string value =
GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
lighting.lut_rg.type, lighting.lut_rg.abs_input);
value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value);
out += fmt::format("refl_value.g = {};\n", value);
} else {
out += "refl_value.g = refl_value.r;\n";
}
// If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
if (lighting.lut_rb.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::ReflectBlue)) {
std::string value =
GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
lighting.lut_rb.type, lighting.lut_rb.abs_input);
value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value);
out += fmt::format("refl_value.b = {};\n", value);
} else {
out += "refl_value.b = refl_value.r;\n";
}
// Specular 1 component
std::string d1_lut_value = "1.0";
if (lighting.lut_d1.enable &&
LightingRegs::IsLightingSamplerSupported(
lighting.config, LightingRegs::LightingSampler::Distribution1)) {
// Lookup specular "distribution 1" LUT value
const std::string value =
GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
lighting.lut_d1.type, lighting.lut_d1.abs_input);
d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value);
}
std::string specular_1 =
fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src);
if (light_config.geometric_factor_1) {
specular_1 = fmt::format("({} * geo_factor)", specular_1);
}
// Fresnel
// Note: only the last entry in the light slots applies the Fresnel factor
if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::Fresnel)) {
// Lookup fresnel LUT value
std::string value =
GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
lighting.lut_fr.type, lighting.lut_fr.abs_input);
value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value);
// Enabled for diffuse lighting alpha component
if (lighting.enable_primary_alpha) {
out += fmt::format("diffuse_sum.a = {};\n", value);
}
// Enabled for the specular lighting alpha component
if (lighting.enable_secondary_alpha) {
out += fmt::format("specular_sum.a = {};\n", value);
}
}
bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable;
bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable;
std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : "";
std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : "";
// Compute primary fragment color (diffuse lighting) function
out += fmt::format(
"diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n",
light_src, light_src, dist_atten, spot_atten, shadow_primary);
// Compute secondary fragment color (specular lighting) function
out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n",
specular_0, specular_1, dist_atten, spot_atten, shadow_secondary);
}
// Apply shadow attenuation to alpha components if enabled
if (lighting.shadow_alpha) {
if (lighting.enable_primary_alpha) {
out += "diffuse_sum.a *= shadow.a;\n";
}
if (lighting.enable_secondary_alpha) {
out += "specular_sum.a *= shadow.a;\n";
}
}
// Sum final lighting result
out += "diffuse_sum.rgb += lighting_global_ambient;\n"
"primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"
"secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
}
using ProcTexClamp = TexturingRegs::ProcTexClamp;
using ProcTexShift = TexturingRegs::ProcTexShift;
using ProcTexCombiner = TexturingRegs::ProcTexCombiner;
using ProcTexFilter = TexturingRegs::ProcTexFilter;
static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode,
ProcTexClamp clamp_mode) {
const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5";
switch (mode) {
case ProcTexShift::None:
2019-07-28 15:18:38 +02:00
out += "0.0";
break;
case ProcTexShift::Odd:
out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v);
break;
case ProcTexShift::Even:
out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v);
break;
default:
LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode);
2019-07-28 15:18:38 +02:00
out += "0.0";
break;
}
}
static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) {
switch (mode) {
case ProcTexClamp::ToZero:
out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var);
break;
case ProcTexClamp::ToEdge:
out += fmt::format("{0} = min({0}, 1.0);\n", var);
break;
case ProcTexClamp::SymmetricalRepeat:
out += fmt::format("{0} = fract({0});\n", var);
break;
case ProcTexClamp::MirroredRepeat: {
out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var);
break;
}
case ProcTexClamp::Pulse:
out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var);
break;
default:
LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode);
out += fmt::format("{0} = min({0}, 1.0);\n", var);
break;
}
}
static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner,
std::string_view offset) {
const auto combined = [combiner]() -> std::string_view {
switch (combiner) {
case ProcTexCombiner::U:
return "u";
case ProcTexCombiner::U2:
return "(u * u)";
case TexturingRegs::ProcTexCombiner::V:
return "v";
case TexturingRegs::ProcTexCombiner::V2:
return "(v * v)";
case TexturingRegs::ProcTexCombiner::Add:
return "((u + v) * 0.5)";
case TexturingRegs::ProcTexCombiner::Add2:
return "((u * u + v * v) * 0.5)";
case TexturingRegs::ProcTexCombiner::SqrtAdd2:
return "min(sqrt(u * u + v * v), 1.0)";
case TexturingRegs::ProcTexCombiner::Min:
return "min(u, v)";
case TexturingRegs::ProcTexCombiner::Max:
return "max(u, v)";
case TexturingRegs::ProcTexCombiner::RMax:
return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)";
default:
LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner);
return "0.0";
}
}();
out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined);
}
static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) {
// LUT sampling uitlity
// For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
// value entries and difference entries.
out += R"(
float ProcTexLookupLUT(int offset, float coord) {
2019-07-28 15:18:38 +02:00
coord *= 128.0;
float index_i = clamp(floor(coord), 0.0, 127.0);
float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be
// extracted as index_i = 127.0 and index_f = 1.0
vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg;
return clamp(entry.r + entry.g * index_f, 0.0, 1.0);
}
)";
// Noise utility
if (config.state.proctex.noise_enable) {
// See swrasterizer/proctex.cpp for more information about these functions
out += R"(
int ProcTexNoiseRand1D(int v) {
const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11);
return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF];
}
float ProcTexNoiseRand2D(vec2 point) {
const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14);
int u2 = ProcTexNoiseRand1D(int(point.x));
int v2 = ProcTexNoiseRand1D(int(point.y));
v2 += ((u2 & 3) == 1) ? 4 : 0;
v2 ^= (u2 & 1) * 6;
v2 += 10 + u2;
v2 &= 0xF;
v2 ^= table[u2];
return -1.0 + float(v2) * 2.0/ 15.0;
}
float ProcTexNoiseCoef(vec2 x) {
vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p);
vec2 point = floor(grid);
vec2 frac = grid - point;
float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y);
float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0);
float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0);
float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0);
float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x);
float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y);
float x0 = mix(g0, g1, x_noise);
float x1 = mix(g2, g3, x_noise);
return mix(x0, x1, y_noise);
}
)";
}
out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width);
// Offsets for level 4-7 seem to be hardcoded
out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n",
config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3);
out += "int lut_offset = lut_offsets[level];\n";
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
2019-07-28 15:18:38 +02:00
out += "lut_coord *= float(lut_width - 1);\n";
switch (config.state.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::LinearMipmapLinear:
case ProcTexFilter::LinearMipmapNearest:
out += "int lut_index_i = int(lut_coord) + lut_offset;\n";
out += "float lut_index_f = fract(lut_coord);\n";
out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + "
"proctex_lut_offset) + "
"lut_index_f * "
"texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n";
break;
case ProcTexFilter::Nearest:
case ProcTexFilter::NearestMipmapLinear:
case ProcTexFilter::NearestMipmapNearest:
2019-07-28 15:18:38 +02:00
out += "lut_coord += float(lut_offset);\n";
out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + "
"proctex_lut_offset);\n";
break;
}
out += "}\n";
out += "vec4 ProcTex() {\n";
if (config.state.proctex.coord < 3) {
out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord);
} else {
2018-06-29 13:18:07 +02:00
LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
out += "vec2 uv = abs(texcoord0);\n";
}
// This LOD formula is the same as the LOD upper limit defined in OpenGL.
// f(x, y) <= m_u + m_v + m_w
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
// Note: this is different from the one normal 2D textures use.
out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
// unlike normal texture, the bias is inside the log2
out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n",
config.state.proctex.lut_width);
out += "if (proctex_bias == 0.0) lod = 0.0;\n";
out += fmt::format("lod = clamp(lod, {:#}, {:#});\n",
std::max(0.0f, static_cast<float>(config.state.proctex.lod_min)),
std::min(7.0f, static_cast<float>(config.state.proctex.lod_max)));
// Get shift offset before noise generation
out += "float u_shift = ";
AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
config.state.proctex.u_clamp);
out += ";\n";
out += "float v_shift = ";
AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
config.state.proctex.v_clamp);
out += ";\n";
// Generate noise
if (config.state.proctex.noise_enable) {
out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"
"uv = abs(uv);\n";
}
// Shift
out += "float u = uv.x + u_shift;\n"
"float v = uv.y + v_shift;\n";
// Clamp
AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
// Combine and map
out += "float lut_coord = ";
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
"proctex_color_map_offset");
out += ";\n";
switch (config.state.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::Nearest:
out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
break;
case ProcTexFilter::NearestMipmapNearest:
case ProcTexFilter::LinearMipmapNearest:
out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n";
break;
case ProcTexFilter::NearestMipmapLinear:
case ProcTexFilter::LinearMipmapLinear:
out += "int lod_i = int(lod);\n"
"float lod_f = fract(lod);\n"
"vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), "
"SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n";
break;
}
if (config.state.proctex.separate_alpha) {
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
// uses the output of CombineAndMap directly instead.
out += "float final_alpha = ";
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
"proctex_alpha_map_offset");
out += ";\n";
out += "return vec4(final_color.xyz, final_alpha);\n}\n";
} else {
out += "return final_color;\n}\n";
}
}
ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
bool separable_shader) {
2016-05-01 23:28:39 +02:00
const auto& state = config.state;
std::string out;
2016-05-01 23:28:39 +02:00
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
2016-12-25 20:55:24 +01:00
if (GLES) {
out += fragment_shader_precision_OES;
}
out += GetVertexInterfaceDeclaration(false, separable_shader);
out += R"(
#ifndef CITRA_GLES
2015-12-02 19:23:51 +01:00
in vec4 gl_FragCoord;
#endif // CITRA_GLES
2015-12-02 19:23:51 +01:00
out vec4 color;
uniform sampler2D tex0;
uniform sampler2D tex1;
uniform sampler2D tex2;
Custom textures rewrite (#6452) * common: Add thread pool from yuzu * Is really useful for asynchronous operations like shader compilation and custom textures, will be used in following PRs * core: Improve ImageInterface * Provide a default implementation so frontends don't have to duplicate code registering the lodepng version * Add a dds version too which we will use in the next commit * rasterizer_cache: Rewrite custom textures * There's just too much to talk about here, look at the PR description for more details * rasterizer_cache: Implement basic pack configuration file * custom_tex_manager: Flip dumped textures * custom_tex_manager: Optimize custom texture hashing * If no convertions are needed then we can hash the decoded data directly removing the needed for duplicate decode * custom_tex_manager: Implement asynchronous texture loading * The file loading and decoding is offloaded into worker threads, while the upload itself still occurs in the main thread to avoid having to manage shared contexts * Address review comments * custom_tex_manager: Introduce custom material support * video_core: Move custom textures to separate directory * Also split the files to make the code cleaner * gl_texture_runtime: Generate mipmaps for material * custom_tex_manager: Prevent memory overflow when preloading * externals: Add dds-ktx as submodule * string_util: Return vector from SplitString * No code benefits from passing it as an argument * custom_textures: Use json config file * gl_rasterizer: Only bind material for unit 0 * Address review comments
2023-04-27 06:38:28 +02:00
uniform sampler2D tex_normal; //< Used for custom normal maps
2018-03-09 15:46:34 +01:00
uniform samplerCube tex_cube;
uniform samplerBuffer texture_buffer_lut_lf;
uniform samplerBuffer texture_buffer_lut_rg;
uniform samplerBuffer texture_buffer_lut_rgba;
layout(r32ui) uniform readonly uimage2D shadow_texture_px;
layout(r32ui) uniform readonly uimage2D shadow_texture_nx;
layout(r32ui) uniform readonly uimage2D shadow_texture_py;
layout(r32ui) uniform readonly uimage2D shadow_texture_ny;
layout(r32ui) uniform readonly uimage2D shadow_texture_pz;
layout(r32ui) uniform readonly uimage2D shadow_texture_nz;
layout(r32ui) uniform uimage2D shadow_buffer;
)";
out += UniformBlockDef;
out += R"(
// Rotate the vector v by the quaternion q
vec3 quaternion_rotate(vec4 q, vec3 v) {
return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
}
float LookupLightingLUT(int lut_index, int index, float delta) {
vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
return entry.r + entry.g * delta;
}
float LookupLightingLUTUnsigned(int lut_index, float pos) {
int index = clamp(int(pos * 256.0), 0, 255);
float delta = pos * 256.0 - float(index);
return LookupLightingLUT(lut_index, index, delta);
}
float LookupLightingLUTSigned(int lut_index, float pos) {
int index = clamp(int(pos * 128.0), -128, 127);
float delta = pos * 128.0 - float(index);
if (index < 0) index += 256;
return LookupLightingLUT(lut_index, index, delta);
}
float byteround(float x) {
return round(x * 255.0) * (1.0 / 255.0);
}
vec2 byteround(vec2 x) {
return round(x * 255.0) * (1.0 / 255.0);
}
vec3 byteround(vec3 x) {
return round(x * 255.0) * (1.0 / 255.0);
}
vec4 byteround(vec4 x) {
return round(x * 255.0) * (1.0 / 255.0);
}
// PICA's LOD formula for 2D textures.
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
// f(x, y) >= max{m_u, m_v, m_w}
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
float getLod(vec2 coord) {
vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord)));
return log2(max(d.x, d.y));
}
uvec2 DecodeShadow(uint pixel) {
return uvec2(pixel >> 8, pixel & 0xFFu);
}
uint EncodeShadow(uvec2 pixel) {
return (pixel.x << 8) | pixel.y;
}
float CompareShadow(uint pixel, uint z) {
uvec2 p = DecodeShadow(pixel);
return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
}
float SampleShadow2D(ivec2 uv, uint z) {
if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) )))
return 1.0;
return CompareShadow(imageLoad(shadow_texture_px, uv).x, z);
}
float mix2(vec4 s, vec2 a) {
vec2 t = mix(s.xy, s.zw, a.yy);
return mix(t.x, t.y, a.x);
}
vec4 shadowTexture(vec2 uv, float w) {
)";
if (!config.state.shadow_texture_orthographic) {
out += "uv /= w;";
}
2019-07-28 15:18:38 +02:00
out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
out += R"(
vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i = ivec2(coord_floor);
vec4 s = vec4(
SampleShadow2D(i , z),
SampleShadow2D(i + ivec2(1, 0), z),
SampleShadow2D(i + ivec2(0, 1), z),
SampleShadow2D(i + ivec2(1, 1), z));
return vec4(mix2(s, f));
}
vec4 shadowTextureCube(vec2 uv, float w) {
ivec2 size = imageSize(shadow_texture_px);
vec3 c = vec3(uv, w);
vec3 a = abs(c);
if (a.x > a.y && a.x > a.z) {
w = a.x;
uv = -c.zy;
if (c.x < 0.0) uv.x = -uv.x;
} else if (a.y > a.z) {
w = a.y;
uv = c.xz;
if (c.y < 0.0) uv.y = -uv.y;
} else {
w = a.z;
uv = -c.xy;
if (c.z > 0.0) uv.x = -uv.x;
}
)";
2019-07-28 15:18:38 +02:00
out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
out += R"(
vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i00 = ivec2(coord_floor);
ivec2 i10 = i00 + ivec2(1, 0);
ivec2 i01 = i00 + ivec2(0, 1);
ivec2 i11 = i00 + ivec2(1, 1);
ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1);
i00 = clamp(i00, cmin, cmax);
i10 = clamp(i10, cmin, cmax);
i01 = clamp(i01, cmin, cmax);
i11 = clamp(i11, cmin, cmax);
uvec4 pixels;
// This part should have been refactored into functions,
// but many drivers don't like passing uimage2D as parameters
if (a.x > a.y && a.x > a.z) {
if (c.x > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_px, i00).r,
imageLoad(shadow_texture_px, i10).r,
imageLoad(shadow_texture_px, i01).r,
imageLoad(shadow_texture_px, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_nx, i00).r,
imageLoad(shadow_texture_nx, i10).r,
imageLoad(shadow_texture_nx, i01).r,
imageLoad(shadow_texture_nx, i11).r);
} else if (a.y > a.z) {
if (c.y > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_py, i00).r,
imageLoad(shadow_texture_py, i10).r,
imageLoad(shadow_texture_py, i01).r,
imageLoad(shadow_texture_py, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_ny, i00).r,
imageLoad(shadow_texture_ny, i10).r,
imageLoad(shadow_texture_ny, i01).r,
imageLoad(shadow_texture_ny, i11).r);
} else {
if (c.z > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_pz, i00).r,
imageLoad(shadow_texture_pz, i10).r,
imageLoad(shadow_texture_pz, i01).r,
imageLoad(shadow_texture_pz, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_nz, i00).r,
imageLoad(shadow_texture_nz, i10).r,
imageLoad(shadow_texture_nz, i01).r,
imageLoad(shadow_texture_nz, i11).r);
}
vec4 s = vec4(
CompareShadow(pixels.x, z),
CompareShadow(pixels.y, z),
CompareShadow(pixels.z, z),
CompareShadow(pixels.w, z));
return vec4(mix2(s, f));
}
)";
if (config.state.proctex.enable)
AppendProcTexSampler(out, config);
// We round the interpolated primary color to the nearest 1/255th
// This maintains the PICA's 8 bits of precision
out += R"(
void main() {
vec4 rounded_primary_color = byteround(primary_color);
vec4 primary_fragment_color = vec4(0.0);
vec4 secondary_fragment_color = vec4(0.0);
)";
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
2017-01-29 00:12:09 +01:00
if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
out += "discard; }";
return {std::move(out)};
}
2015-12-02 19:23:51 +01:00
// Append the scissor test
if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) {
2015-12-02 19:23:51 +01:00
out += "if (";
// Negate the condition if we have to keep only the pixels outside the scissor box
if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) {
out += '!';
}
out += "(gl_FragCoord.x >= float(scissor_x1) && "
"gl_FragCoord.y >= float(scissor_y1) && "
"gl_FragCoord.x < float(scissor_x2) && "
"gl_FragCoord.y < float(scissor_y2))) discard;\n";
2015-12-02 19:23:51 +01:00
}
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
// do our own transformation according to PICA specification.
out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n"
"float depth = z_over_w * depth_scale + depth_offset;\n";
2017-01-29 00:12:09 +01:00
if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
2016-05-21 01:04:57 +02:00
out += "depth /= gl_FragCoord.w;\n";
}
2016-05-01 23:28:39 +02:00
if (state.lighting.enable)
WriteLighting(out, config);
out += "vec4 combiner_buffer = vec4(0.0);\n"
"vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"
"vec4 last_tex_env_out = vec4(0.0);\n";
for (std::size_t index = 0; index < state.tev_stages.size(); ++index) {
WriteTevStage(out, config, static_cast<u32>(index));
}
2017-01-29 00:12:09 +01:00
if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
out += "if (";
2016-05-01 23:28:39 +02:00
AppendAlphaTestCondition(out, state.alpha_test_func);
out += ") discard;\n";
}
2016-05-21 01:04:57 +02:00
// Append fog combiner
2017-01-29 00:12:09 +01:00
if (state.fog_mode == TexturingRegs::FogMode::Fog) {
2016-05-21 01:04:57 +02:00
// Get index into fog LUT
if (state.fog_flip) {
out += "float fog_index = (1.0 - float(depth)) * 128.0;\n";
2016-05-21 01:04:57 +02:00
} else {
out += "float fog_index = depth * 128.0;\n";
}
2016-05-21 01:04:57 +02:00
// Generate clamped fog factor from LUT for given fog index
out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"
"float fog_f = fog_index - fog_i;\n"
"vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + "
"fog_lut_offset).rg;\n"
"float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"
"fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
2016-05-21 01:04:57 +02:00
// Blend the fog
out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
} else if (state.fog_mode == TexturingRegs::FogMode::Gas) {
Core::System::GetInstance().TelemetrySession().AddField(
Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true);
2018-06-29 13:18:07 +02:00
LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode");
out += "discard; }";
return {std::move(out)};
}
2016-05-21 01:04:57 +02:00
if (state.shadow_rendering) {
out += R"(
uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
uint s = uint(last_tex_env_out.g * float(0xFF));
ivec2 image_coord = ivec2(gl_FragCoord.xy);
uint old = imageLoad(shadow_buffer, image_coord).x;
uint new;
uint old2;
do {
old2 = old;
uvec2 ref = DecodeShadow(old);
if (d < ref.x) {
if (s == 0u) {
ref.x = d;
} else {
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
ref.y = min(s, ref.y);
}
}
new = EncodeShadow(ref);
} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2);
)";
} else {
out += "gl_FragDepth = depth;\n";
// Round the final fragment color to maintain the PICA's 8 bits of precision
out += "color = byteround(last_tex_env_out);\n";
}
if (GLES) {
if (!state.alphablend_enable) {
switch (state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
out += "color = vec4(0);\n";
break;
case FramebufferRegs::LogicOp::Set:
out += "color = vec4(1);\n";
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
UNIMPLEMENTED();
}
}
}
out += '}';
return {std::move(out)};
}
ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) {
std::string out;
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
out +=
fmt::format("layout(location = {}) in vec4 vert_position;\n"
"layout(location = {}) in vec4 vert_color;\n"
"layout(location = {}) in vec2 vert_texcoord0;\n"
"layout(location = {}) in vec2 vert_texcoord1;\n"
"layout(location = {}) in vec2 vert_texcoord2;\n"
"layout(location = {}) in float vert_texcoord0_w;\n"
"layout(location = {}) in vec4 vert_normquat;\n"
"layout(location = {}) in vec3 vert_view;\n",
ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1,
ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW);
out += GetVertexInterfaceDeclaration(true, separable_shader);
out += UniformBlockDef;
out += R"(
void main() {
primary_color = vert_color;
texcoord0 = vert_texcoord0;
texcoord1 = vert_texcoord1;
texcoord2 = vert_texcoord2;
2016-04-19 00:53:42 +02:00
texcoord0_w = vert_texcoord0_w;
normquat = vert_normquat;
view = vert_view;
gl_Position = vert_position;
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
gl_ClipDistance[1] = dot(clip_coef, vert_position);
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
}
)";
return {std::move(out)};
}
std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) {
std::string out;
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n";
}
out += ShaderDecompiler::GetCommonDeclarations();
std::array<bool, 16> used_regs{};
const auto get_input_reg = [&used_regs](u32 reg) {
ASSERT(reg < 16);
used_regs[reg] = true;
return fmt::format("vs_in_reg{}", reg);
};
const auto get_output_reg = [&](u32 reg) -> std::string {
ASSERT(reg < 16);
if (config.state.output_map[reg] < config.state.num_outputs) {
return fmt::format("vs_out_attr{}", config.state.output_map[reg]);
}
return "";
};
auto program_source_opt = ShaderDecompiler::DecompileProgram(
setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg,
get_output_reg, config.state.sanitize_mul);
if (!program_source_opt)
return std::nullopt;
std::string& program_source = program_source_opt->code;
out += R"(
#define uniforms vs_uniforms
layout (std140) uniform vs_config {
pica_uniforms uniforms;
};
)";
// input attributes declaration
for (std::size_t i = 0; i < used_regs.size(); ++i) {
if (used_regs[i]) {
out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i);
}
}
out += '\n';
// output attributes declaration
for (u32 i = 0; i < config.state.num_outputs; ++i) {
out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) +
" out vec4 vs_out_attr" + std::to_string(i) + ";\n";
}
out += "\nvoid main() {\n";
for (u32 i = 0; i < config.state.num_outputs; ++i) {
out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i);
}
out += "\n exec_shader();\n}\n\n";
out += program_source;
return {{std::move(out)}};
}
static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) {
std::string out = GetVertexInterfaceDeclaration(true, separable_shader);
out += UniformBlockDef;
out += ShaderDecompiler::GetCommonDeclarations();
out += '\n';
for (u32 i = 0; i < config.vs_output_attributes; ++i) {
out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) +
" in vec4 vs_out_attr" + std::to_string(i) + "[];\n";
}
out += R"(
struct Vertex {
)";
out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes);
out += "};\n\n";
const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string {
const u32 slot = static_cast<u32>(slot_semantic);
const u32 attrib = config.semantic_maps[slot].attribute_index;
const u32 comp = config.semantic_maps[slot].component_index;
if (attrib < config.gs_output_attributes) {
return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]);
}
return "0.0";
};
out += "vec4 GetVertexQuaternion(Vertex vtx) {\n";
out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " +
semantic(VSOutputAttributes::QUATERNION_Y) + ", " +
semantic(VSOutputAttributes::QUATERNION_Z) + ", " +
semantic(VSOutputAttributes::QUATERNION_W) + ");\n";
out += "}\n\n";
out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n";
out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " +
semantic(VSOutputAttributes::POSITION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n";
out += " gl_Position = vtx_pos;\n";
out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n";
out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n";
out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n";
out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " +
semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) +
", " + semantic(VSOutputAttributes::COLOR_A) + ");\n";
out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n";
out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n";
out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n";
out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n";
out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " +
semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) +
");\n\n";
out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n";
out += " EmitVertex();\n";
out += "}\n";
out += R"(
bool AreQuaternionsOpposite(vec4 qa, vec4 qb) {
return (dot(qa, qb) < 0.0);
}
void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
EmitVtx(vtx0, false);
EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1)));
EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2)));
EndPrimitive();
}
)";
return out;
};
ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
bool separable_shader) {
std::string out;
if (separable_shader && !GLES) {
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
}
out += R"(
layout(triangles) in;
layout(triangle_strip, max_vertices = 3) out;
)";
out += GetGSCommonSource(config.state, separable_shader);
out += R"(
void main() {
Vertex prim_buffer[3];
)";
for (u32 vtx = 0; vtx < 3; ++vtx) {
out += fmt::format(" prim_buffer[{}].attributes = vec4[{}](", vtx,
config.state.gs_output_attributes);
for (u32 i = 0; i < config.state.vs_output_attributes; ++i) {
out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx);
}
out += ");\n";
}
out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n";
out += "}\n";
return {std::move(out)};
}
} // namespace OpenGL