diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index ee077cbbd..ed261c1cf 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -4,24 +4,24 @@ include(GenerateBuildInfo) # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) set(VIDEO_CORE "${SRC_DIR}/src/video_core") set(HASH_FILES - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" - "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.cpp" - "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.h" - "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.cpp" - "${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.h" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp" "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h" + "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp" + "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h" + "${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/glsl_shader_gen.h" + "${VIDEO_CORE}/shader/generator/shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/shader_gen.h" + "${VIDEO_CORE}/shader/generator/shader_uniforms.cpp" + "${VIDEO_CORE}/shader/generator/shader_uniforms.h" + "${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/spv_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" - "${VIDEO_CORE}/shader/shader_uniforms.cpp" - "${VIDEO_CORE}/shader/shader_uniforms.h" "${VIDEO_CORE}/pica.cpp" "${VIDEO_CORE}/pica.h" "${VIDEO_CORE}/regs_framebuffer.h" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 8bb515488..73270972a 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -15,14 +15,22 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp" + "${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h" + "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp" + "${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h" + "${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/glsl_shader_gen.h" + "${VIDEO_CORE}/shader/generator/shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/shader_gen.h" + "${VIDEO_CORE}/shader/generator/shader_uniforms.cpp" + "${VIDEO_CORE}/shader/generator/shader_uniforms.h" + "${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp" + "${VIDEO_CORE}/shader/generator/spv_shader_gen.h" "${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.h" "${VIDEO_CORE}/pica.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 00b58d5d4..7951002c8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -61,12 +61,8 @@ add_library(video_core STATIC renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_disk_cache.cpp renderer_opengl/gl_shader_disk_cache.h - renderer_opengl/gl_shader_gen.cpp - renderer_opengl/gl_shader_gen.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -130,10 +126,6 @@ add_library(video_core STATIC renderer_vulkan/vk_present_window.h renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.h - renderer_vulkan/vk_shader_gen.cpp - renderer_vulkan/vk_shader_gen.h - renderer_vulkan/vk_shader_gen_spv.cpp - renderer_vulkan/vk_shader_gen_spv.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_stream_buffer.cpp @@ -143,6 +135,16 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_runtime.cpp renderer_vulkan/vk_texture_runtime.h shader/debug_data.h + shader/generator/glsl_shader_decompiler.cpp + shader/generator/glsl_shader_decompiler.h + shader/generator/glsl_shader_gen.cpp + shader/generator/glsl_shader_gen.h + shader/generator/shader_gen.cpp + shader/generator/shader_gen.h + shader/generator/shader_uniforms.cpp + shader/generator/shader_uniforms.h + shader/generator/spv_shader_gen.cpp + shader/generator/spv_shader_gen.h shader/shader.cpp shader/shader.h shader/shader_interpreter.cpp @@ -151,8 +153,6 @@ add_library(video_core STATIC shader/shader_jit_x64_compiler.cpp shader/shader_jit_x64.h shader/shader_jit_x64_compiler.h - shader/shader_uniforms.cpp - shader/shader_uniforms.h texture/etc1.cpp texture/etc1.h texture/texture_decode.cpp diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 18b93d602..b6e8bb4fa 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -54,7 +54,7 @@ RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::Output RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_) : memory{memory_}, regs{Pica::g_state.regs} { - uniform_block_data.lighting_lut_dirty.fill(true); + fs_uniform_block_data.lighting_lut_dirty.fill(true); } /** @@ -135,7 +135,7 @@ void RasterizerAccelerated::SyncEntireState() { SyncFixedState(); // Sync uniforms - SyncClipCoef(); + SyncClipPlane(); SyncDepthScale(); SyncDepthOffset(); SyncAlphaTest(); @@ -199,7 +199,7 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(texturing.fog_lut_data[5]): case PICA_REG_INDEX(texturing.fog_lut_data[6]): case PICA_REG_INDEX(texturing.fog_lut_data[7]): - uniform_block_data.fog_lut_dirty = true; + fs_uniform_block_data.fog_lut_dirty = true; break; // ProcTex state @@ -227,19 +227,19 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { using Pica::TexturingRegs; switch (regs.texturing.proctex_lut_config.ref_table.Value()) { case TexturingRegs::ProcTexLutTable::Noise: - uniform_block_data.proctex_noise_lut_dirty = true; + fs_uniform_block_data.proctex_noise_lut_dirty = true; break; case TexturingRegs::ProcTexLutTable::ColorMap: - uniform_block_data.proctex_color_map_dirty = true; + fs_uniform_block_data.proctex_color_map_dirty = true; break; case TexturingRegs::ProcTexLutTable::AlphaMap: - uniform_block_data.proctex_alpha_map_dirty = true; + fs_uniform_block_data.proctex_alpha_map_dirty = true; break; case TexturingRegs::ProcTexLutTable::Color: - uniform_block_data.proctex_lut_dirty = true; + fs_uniform_block_data.proctex_lut_dirty = true; break; case TexturingRegs::ProcTexLutTable::ColorDiff: - uniform_block_data.proctex_diff_lut_dirty = true; + fs_uniform_block_data.proctex_diff_lut_dirty = true; break; } break; @@ -588,8 +588,8 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(lighting.lut_data[6]): case PICA_REG_INDEX(lighting.lut_data[7]): { const auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lighting_lut_dirty[lut_config.type] = true; - uniform_block_data.lighting_lut_dirty_any = true; + fs_uniform_block_data.lighting_lut_dirty[lut_config.type] = true; + fs_uniform_block_data.lighting_lut_dirty_any = true; break; } @@ -616,11 +616,12 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { break; // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): case PICA_REG_INDEX(rasterizer.clip_coef[0]): case PICA_REG_INDEX(rasterizer.clip_coef[1]): case PICA_REG_INDEX(rasterizer.clip_coef[2]): case PICA_REG_INDEX(rasterizer.clip_coef[3]): - SyncClipCoef(); + SyncClipPlane(); break; } @@ -631,18 +632,18 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { void RasterizerAccelerated::SyncDepthScale() { const f32 depth_scale = f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); - if (depth_scale != uniform_block_data.data.depth_scale) { - uniform_block_data.data.depth_scale = depth_scale; - uniform_block_data.dirty = true; + if (depth_scale != fs_uniform_block_data.data.depth_scale) { + fs_uniform_block_data.data.depth_scale = depth_scale; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncDepthOffset() { const f32 depth_offset = f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); - if (depth_offset != uniform_block_data.data.depth_offset) { - uniform_block_data.data.depth_offset = depth_offset; - uniform_block_data.dirty = true; + if (depth_offset != fs_uniform_block_data.data.depth_offset) { + fs_uniform_block_data.data.depth_offset = depth_offset; + fs_uniform_block_data.dirty = true; } } @@ -654,9 +655,9 @@ void RasterizerAccelerated::SyncFogColor() { fog_color_regs.b.Value() / 255.0f, }; - if (fog_color != uniform_block_data.data.fog_color) { - uniform_block_data.data.fog_color = fog_color; - uniform_block_data.dirty = true; + if (fog_color != fs_uniform_block_data.data.fog_color) { + fs_uniform_block_data.data.fog_color = fog_color; + fs_uniform_block_data.dirty = true; } } @@ -674,13 +675,13 @@ void RasterizerAccelerated::SyncProcTexNoise() { Pica::f16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(), }; - if (proctex_noise_f != uniform_block_data.data.proctex_noise_f || - proctex_noise_a != uniform_block_data.data.proctex_noise_a || - proctex_noise_p != uniform_block_data.data.proctex_noise_p) { - uniform_block_data.data.proctex_noise_f = proctex_noise_f; - uniform_block_data.data.proctex_noise_a = proctex_noise_a; - uniform_block_data.data.proctex_noise_p = proctex_noise_p; - uniform_block_data.dirty = true; + if (proctex_noise_f != fs_uniform_block_data.data.proctex_noise_f || + proctex_noise_a != fs_uniform_block_data.data.proctex_noise_a || + proctex_noise_p != fs_uniform_block_data.data.proctex_noise_p) { + fs_uniform_block_data.data.proctex_noise_f = proctex_noise_f; + fs_uniform_block_data.data.proctex_noise_a = proctex_noise_a; + fs_uniform_block_data.data.proctex_noise_p = proctex_noise_p; + fs_uniform_block_data.dirty = true; } } @@ -688,25 +689,25 @@ void RasterizerAccelerated::SyncProcTexBias() { const auto proctex_bias = Pica::f16::FromRaw(regs.texturing.proctex.bias_low | (regs.texturing.proctex_lut.bias_high << 8)) .ToFloat32(); - if (proctex_bias != uniform_block_data.data.proctex_bias) { - uniform_block_data.data.proctex_bias = proctex_bias; - uniform_block_data.dirty = true; + if (proctex_bias != fs_uniform_block_data.data.proctex_bias) { + fs_uniform_block_data.data.proctex_bias = proctex_bias; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncAlphaTest() { if (regs.framebuffer.output_merger.alpha_test.ref != - static_cast(uniform_block_data.data.alphatest_ref)) { - uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; - uniform_block_data.dirty = true; + static_cast(fs_uniform_block_data.data.alphatest_ref)) { + fs_uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncCombinerColor() { const auto combiner_color = ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw); - if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { - uniform_block_data.data.tev_combiner_buffer_color = combiner_color; - uniform_block_data.dirty = true; + if (combiner_color != fs_uniform_block_data.data.tev_combiner_buffer_color) { + fs_uniform_block_data.data.tev_combiner_buffer_color = combiner_color; + fs_uniform_block_data.dirty = true; } } @@ -714,51 +715,51 @@ void RasterizerAccelerated::SyncTevConstColor( const size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) { const auto const_color = ColorRGBA8(tev_stage.const_color); - if (const_color == uniform_block_data.data.const_color[stage_index]) { + if (const_color == fs_uniform_block_data.data.const_color[stage_index]) { return; } - uniform_block_data.data.const_color[stage_index] = const_color; - uniform_block_data.dirty = true; + fs_uniform_block_data.data.const_color[stage_index] = const_color; + fs_uniform_block_data.dirty = true; } void RasterizerAccelerated::SyncGlobalAmbient() { const auto color = LightColor(regs.lighting.global_ambient); - if (color != uniform_block_data.data.lighting_global_ambient) { - uniform_block_data.data.lighting_global_ambient = color; - uniform_block_data.dirty = true; + if (color != fs_uniform_block_data.data.lighting_global_ambient) { + fs_uniform_block_data.data.lighting_global_ambient = color; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncLightSpecular0(int light_index) { const auto color = LightColor(regs.lighting.light[light_index].specular_0); - if (color != uniform_block_data.data.light_src[light_index].specular_0) { - uniform_block_data.data.light_src[light_index].specular_0 = color; - uniform_block_data.dirty = true; + if (color != fs_uniform_block_data.data.light_src[light_index].specular_0) { + fs_uniform_block_data.data.light_src[light_index].specular_0 = color; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncLightSpecular1(int light_index) { const auto color = LightColor(regs.lighting.light[light_index].specular_1); - if (color != uniform_block_data.data.light_src[light_index].specular_1) { - uniform_block_data.data.light_src[light_index].specular_1 = color; - uniform_block_data.dirty = true; + if (color != fs_uniform_block_data.data.light_src[light_index].specular_1) { + fs_uniform_block_data.data.light_src[light_index].specular_1 = color; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncLightDiffuse(int light_index) { const auto color = LightColor(regs.lighting.light[light_index].diffuse); - if (color != uniform_block_data.data.light_src[light_index].diffuse) { - uniform_block_data.data.light_src[light_index].diffuse = color; - uniform_block_data.dirty = true; + if (color != fs_uniform_block_data.data.light_src[light_index].diffuse) { + fs_uniform_block_data.data.light_src[light_index].diffuse = color; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncLightAmbient(int light_index) { const auto color = LightColor(regs.lighting.light[light_index].ambient); - if (color != uniform_block_data.data.light_src[light_index].ambient) { - uniform_block_data.data.light_src[light_index].ambient = color; - uniform_block_data.dirty = true; + if (color != fs_uniform_block_data.data.light_src[light_index].ambient) { + fs_uniform_block_data.data.light_src[light_index].ambient = color; + fs_uniform_block_data.dirty = true; } } @@ -769,9 +770,9 @@ void RasterizerAccelerated::SyncLightPosition(int light_index) { Pica::f16::FromRaw(regs.lighting.light[light_index].z).ToFloat32(), }; - if (position != uniform_block_data.data.light_src[light_index].position) { - uniform_block_data.data.light_src[light_index].position = position; - uniform_block_data.dirty = true; + if (position != fs_uniform_block_data.data.light_src[light_index].position) { + fs_uniform_block_data.data.light_src[light_index].position = position; + fs_uniform_block_data.dirty = true; } } @@ -780,9 +781,9 @@ void RasterizerAccelerated::SyncLightSpotDirection(int light_index) { const auto spot_direction = Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f}; - if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { - uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; - uniform_block_data.dirty = true; + if (spot_direction != fs_uniform_block_data.data.light_src[light_index].spot_direction) { + fs_uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + fs_uniform_block_data.dirty = true; } } @@ -790,9 +791,9 @@ void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) { const f32 dist_atten_bias = Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); - if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { - uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; - uniform_block_data.dirty = true; + if (dist_atten_bias != fs_uniform_block_data.data.light_src[light_index].dist_atten_bias) { + fs_uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + fs_uniform_block_data.dirty = true; } } @@ -800,9 +801,9 @@ void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) { const f32 dist_atten_scale = Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); - if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { - uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; - uniform_block_data.dirty = true; + if (dist_atten_scale != fs_uniform_block_data.data.light_src[light_index].dist_atten_scale) { + fs_uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + fs_uniform_block_data.dirty = true; } } @@ -811,28 +812,28 @@ void RasterizerAccelerated::SyncShadowBias() { const f32 constant = Pica::f16::FromRaw(shadow.constant).ToFloat32(); const f32 linear = Pica::f16::FromRaw(shadow.linear).ToFloat32(); - if (constant != uniform_block_data.data.shadow_bias_constant || - linear != uniform_block_data.data.shadow_bias_linear) { - uniform_block_data.data.shadow_bias_constant = constant; - uniform_block_data.data.shadow_bias_linear = linear; - uniform_block_data.dirty = true; + if (constant != fs_uniform_block_data.data.shadow_bias_constant || + linear != fs_uniform_block_data.data.shadow_bias_linear) { + fs_uniform_block_data.data.shadow_bias_constant = constant; + fs_uniform_block_data.data.shadow_bias_linear = linear; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncShadowTextureBias() { const s32 bias = regs.texturing.shadow.bias << 1; - if (bias != uniform_block_data.data.shadow_texture_bias) { - uniform_block_data.data.shadow_texture_bias = bias; - uniform_block_data.dirty = true; + if (bias != fs_uniform_block_data.data.shadow_texture_bias) { + fs_uniform_block_data.data.shadow_texture_bias = bias; + fs_uniform_block_data.dirty = true; } } void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { const auto pica_textures = regs.texturing.GetTextures(); const f32 bias = pica_textures[tex_index].config.lod.bias / 256.0f; - if (bias != uniform_block_data.data.tex_lod_bias[tex_index]) { - uniform_block_data.data.tex_lod_bias[tex_index] = bias; - uniform_block_data.dirty = true; + if (bias != fs_uniform_block_data.data.tex_lod_bias[tex_index]) { + fs_uniform_block_data.data.tex_lod_bias[tex_index] = bias; + fs_uniform_block_data.dirty = true; } } @@ -840,19 +841,22 @@ void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) { const auto pica_textures = regs.texturing.GetTextures(); const auto params = pica_textures[tex_index].config; const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw); - if (border_color != uniform_block_data.data.tex_border_color[tex_index]) { - uniform_block_data.data.tex_border_color[tex_index] = border_color; - uniform_block_data.dirty = true; + if (border_color != fs_uniform_block_data.data.tex_border_color[tex_index]) { + fs_uniform_block_data.data.tex_border_color[tex_index] = border_color; + fs_uniform_block_data.dirty = true; } } -void RasterizerAccelerated::SyncClipCoef() { +void RasterizerAccelerated::SyncClipPlane() { + const bool enable_clip1 = regs.rasterizer.clip_enable != 0; const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; - if (new_clip_coef != uniform_block_data.data.clip_coef) { - uniform_block_data.data.clip_coef = new_clip_coef; - uniform_block_data.dirty = true; + if (enable_clip1 != vs_uniform_block_data.data.enable_clip1 || + new_clip_coef != vs_uniform_block_data.data.clip_coef) { + vs_uniform_block_data.data.enable_clip1 = enable_clip1; + vs_uniform_block_data.data.clip_coef = new_clip_coef; + vs_uniform_block_data.dirty = true; } } diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 2ad11ee32..0bef2c537 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -7,7 +7,7 @@ #include "common/vector_math.h" #include "video_core/rasterizer_interface.h" #include "video_core/regs_texturing.h" -#include "video_core/shader/shader_uniforms.h" +#include "video_core/shader/generator/shader_uniforms.h" namespace Memory { class MemorySystem; @@ -100,13 +100,19 @@ protected: /// Syncs the texture border color to match the PICA registers void SyncTextureBorderColor(int tex_index); - /// Syncs the clip coefficients to match the PICA register - void SyncClipCoef(); + /// Syncs the clip plane state to match the PICA register + void SyncClipPlane(); protected: - /// Structure that keeps tracks of the uniform state - struct UniformBlockData { - Pica::Shader::UniformData data{}; + /// Structure that keeps tracks of the vertex shader uniform state + struct VSUniformBlockData { + Pica::Shader::Generator::VSUniformData data{}; + bool dirty = true; + }; + + /// Structure that keeps tracks of the fragment shader uniform state + struct FSUniformBlockData { + Pica::Shader::Generator::FSUniformData data{}; std::array lighting_lut_dirty{}; bool lighting_lut_dirty_any = true; bool fog_lut_dirty = true; @@ -149,7 +155,8 @@ protected: std::vector vertex_batch; bool shader_dirty = true; - UniformBlockData uniform_block_data{}; + VSUniformBlockData vs_uniform_block_data{}; + FSUniformBlockData fs_uniform_block_data{}; std::array, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; std::array fog_lut_data{}; diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 25cc1aedf..1894d44ef 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -168,7 +168,7 @@ void Driver::CheckExtensionSupport() { arb_clear_texture = GLAD_GL_ARB_clear_texture; arb_get_texture_sub_image = GLAD_GL_ARB_get_texture_sub_image; arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc; - ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance; + clip_cull_distance = !is_gles || GLAD_GL_EXT_clip_cull_distance; ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc; shader_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch; diff --git a/src/video_core/renderer_opengl/gl_driver.h b/src/video_core/renderer_opengl/gl_driver.h index fbb579f9a..0a82b0af5 100644 --- a/src/video_core/renderer_opengl/gl_driver.h +++ b/src/video_core/renderer_opengl/gl_driver.h @@ -100,9 +100,9 @@ public: return arb_get_texture_sub_image; } - /// Returns true if the implementation supports EXT_clip_cull_distance - bool HasExtClipCullDistance() const { - return ext_clip_cull_distance; + /// Returns true if the implementation supports shader-defined clipping planes + bool HasClipCullDistance() const { + return clip_cull_distance; } /// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch @@ -132,7 +132,7 @@ private: bool arb_buffer_storage{}; bool arb_clear_texture{}; bool arb_get_texture_sub_image{}; - bool ext_clip_cull_distance{}; + bool clip_cull_distance{}; bool ext_texture_compression_s3tc{}; bool arb_texture_compression_bptc{}; bool shader_framebuffer_fetch{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 15f3b1984..a5bed59ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -11,9 +11,9 @@ #include "video_core/regs_framebuffer.h" #include "video_core/regs_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/shader/generator/glsl_shader_gen.h" #include "video_core/texture/texture_decode.h" #include "video_core/video_core.h" @@ -28,6 +28,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Display, "OpenGL", "Display", MP_RGB(128, 128, 192)); using VideoCore::SurfaceType; +using namespace Pica::Shader::Generator; constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024; constexpr std::size_t INDEX_BUFFER_SIZE = 2 * 1024 * 1024; @@ -95,10 +96,12 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, hw_vao.Create(); glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + uniform_size_aligned_vs_pica = + Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); uniform_size_aligned_vs = - Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); uniform_size_aligned_fs = - Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); // Set vertex attributes for software shader path state.draw.vertex_array = sw_vao.handle; @@ -405,16 +408,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); - if (uniform_block_data.data.scissor_x1 != scissor_x1 || - uniform_block_data.data.scissor_x2 != scissor_x2 || - uniform_block_data.data.scissor_y1 != scissor_y1 || - uniform_block_data.data.scissor_y2 != scissor_y2) { + if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || + fs_uniform_block_data.data.scissor_x2 != scissor_x2 || + fs_uniform_block_data.data.scissor_y1 != scissor_y1 || + fs_uniform_block_data.data.scissor_y2 != scissor_y2) { - uniform_block_data.data.scissor_x1 = scissor_x1; - uniform_block_data.data.scissor_x2 = scissor_x2; - uniform_block_data.data.scissor_y1 = scissor_y1; - uniform_block_data.data.scissor_y2 = scissor_y2; - uniform_block_data.dirty = true; + fs_uniform_block_data.data.scissor_x1 = scissor_x1; + fs_uniform_block_data.data.scissor_x2 = scissor_x2; + fs_uniform_block_data.data.scissor_y1 = scissor_y1; + fs_uniform_block_data.data.scissor_y2 = scissor_y2; + fs_uniform_block_data.dirty = true; } // Sync and bind the texture surfaces @@ -831,9 +834,9 @@ void RasterizerOpenGL::SyncBlendColor() { state.blend.color.blue = blend_color[2]; state.blend.color.alpha = blend_color[3]; - if (blend_color != uniform_block_data.data.blend_color) { - uniform_block_data.data.blend_color = blend_color; - uniform_block_data.dirty = true; + if (blend_color != fs_uniform_block_data.data.blend_color) { + fs_uniform_block_data.data.blend_color = blend_color; + fs_uniform_block_data.dirty = true; } } @@ -921,7 +924,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(Common::Vec2f) * 128; // fog - if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) { return; } @@ -931,9 +934,9 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); // Sync the lighting luts - if (uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { - if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { + if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { std::array new_data; const auto& source_lut = Pica::g_state.lighting.luts[index]; std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), @@ -945,19 +948,19 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { lighting_lut_data[index] = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); - uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } - uniform_block_data.lighting_lut_dirty[index] = false; + fs_uniform_block_data.lighting_lut_dirty[index] = false; } } - uniform_block_data.lighting_lut_dirty_any = false; + fs_uniform_block_data.lighting_lut_dirty_any = false; } // Sync the fog lut - if (uniform_block_data.fog_lut_dirty || invalidate) { + if (fs_uniform_block_data.fog_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), @@ -969,12 +972,12 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { fog_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); - uniform_block_data.data.fog_lut_offset = + fs_uniform_block_data.data.fog_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } - uniform_block_data.fog_lut_dirty = false; + fs_uniform_block_data.fog_lut_dirty = false; } texture_lf_buffer.Unmap(bytes_used); @@ -986,10 +989,10 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { sizeof(Common::Vec4f) * 256 + // proctex sizeof(Common::Vec4f) * 256; // proctex diff - if (!uniform_block_data.proctex_noise_lut_dirty && - !uniform_block_data.proctex_color_map_dirty && - !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && - !uniform_block_data.proctex_diff_lut_dirty) { + if (!fs_uniform_block_data.proctex_noise_lut_dirty && + !fs_uniform_block_data.proctex_color_map_dirty && + !fs_uniform_block_data.proctex_alpha_map_dirty && + !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) { return; } @@ -1012,34 +1015,34 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } }; // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { sync_proc_tex_value_lut(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, - uniform_block_data.data.proctex_noise_lut_offset); - uniform_block_data.proctex_noise_lut_dirty = false; + fs_uniform_block_data.data.proctex_noise_lut_offset); + fs_uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty || invalidate) { + if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { sync_proc_tex_value_lut(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - uniform_block_data.data.proctex_color_map_offset); - uniform_block_data.proctex_color_map_dirty = false; + fs_uniform_block_data.data.proctex_color_map_offset); + fs_uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { sync_proc_tex_value_lut(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - uniform_block_data.data.proctex_alpha_map_offset); - uniform_block_data.proctex_alpha_map_dirty = false; + fs_uniform_block_data.data.proctex_alpha_map_offset); + fs_uniform_block_data.proctex_alpha_map_dirty = false; } // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.proctex.color_table.begin(), @@ -1053,16 +1056,16 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { proctex_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec4f)); - uniform_block_data.data.proctex_lut_offset = + fs_uniform_block_data.data.proctex_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec4f); } - uniform_block_data.proctex_lut_dirty = false; + fs_uniform_block_data.proctex_lut_dirty = false; } // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.proctex.color_diff_table.begin(), @@ -1076,12 +1079,12 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { proctex_diff_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec4f)); - uniform_block_data.data.proctex_diff_lut_offset = + fs_uniform_block_data.data.proctex_diff_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec4f); } - uniform_block_data.proctex_diff_lut_dirty = false; + fs_uniform_block_data.proctex_diff_lut_dirty = false; } texture_buffer.Unmap(bytes_used); @@ -1092,38 +1095,47 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { state.draw.uniform_buffer = uniform_buffer.GetHandle(); state.Apply(); - const bool sync_vs = accelerate_draw; - const bool sync_fs = uniform_block_data.dirty; - if (!sync_vs && !sync_fs) { + const bool sync_vs_pica = accelerate_draw; + const bool sync_vs = vs_uniform_block_data.dirty; + const bool sync_fs = fs_uniform_block_data.dirty; + if (!sync_vs_pica && !sync_vs && !sync_fs) { return; } - std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + std::size_t uniform_size = + uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs; std::size_t used_bytes = 0; const auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); - if (sync_vs) { - Pica::Shader::VSUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); - std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(Pica::Shader::UniformBindings::VS), - uniform_buffer.GetHandle(), offset + used_bytes, - sizeof(Pica::Shader::VSUniformData)); + if (sync_vs || invalidate) { + std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, + sizeof(vs_uniform_block_data.data)); + glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSData, uniform_buffer.GetHandle(), + offset + used_bytes, sizeof(vs_uniform_block_data.data)); + vs_uniform_block_data.dirty = false; used_bytes += uniform_size_aligned_vs; } if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &uniform_block_data.data, - sizeof(Pica::Shader::UniformData)); - glBindBufferRange( - GL_UNIFORM_BUFFER, static_cast(Pica::Shader::UniformBindings::Common), - uniform_buffer.GetHandle(), offset + used_bytes, sizeof(Pica::Shader::UniformData)); - uniform_block_data.dirty = false; + std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, + sizeof(fs_uniform_block_data.data)); + glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::FSData, uniform_buffer.GetHandle(), + offset + used_bytes, sizeof(fs_uniform_block_data.data)); + fs_uniform_block_data.dirty = false; used_bytes += uniform_size_aligned_fs; } + if (sync_vs_pica) { + VSPicaUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSPicaData, + uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); + used_bytes += uniform_size_aligned_vs_pica; + } + uniform_buffer.Unmap(used_bytes); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4c4bc61f4..72efd78a2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -148,6 +148,7 @@ private: OGLStreamBuffer texture_buffer; OGLStreamBuffer texture_lf_buffer; GLint uniform_buffer_alignment; + std::size_t uniform_size_aligned_vs_pica; std::size_t uniform_size_aligned_vs; std::size_t uniform_size_aligned_fs; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 453edf956..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2017 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/shader/shader.h" - -namespace OpenGL::ShaderDecompiler { - -using RegGetter = std::function; - -struct ProgramResult { - std::string code; -}; - -std::string GetCommonDeclarations(); - -std::optional DecompileProgram(const Pica::Shader::ProgramCode& program_code, - const Pica::Shader::SwizzleData& swizzle_data, - u32 main_offset, const RegGetter& inputreg_getter, - const RegGetter& outputreg_getter, bool sanitize_mul); - -} // namespace OpenGL::ShaderDecompiler diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 88b3c34bf..bbc9cf578 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -297,35 +297,33 @@ std::optional ShaderDiskCache::LoadDecompiledEntry() } ShaderDiskCacheDecompiled entry; - entry.result.code = std::move(code); + entry.code = std::move(code); entry.sanitize_mul = sanitize_mul; return entry; } void ShaderDiskCache::SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, - const ShaderDecompiler::ProgramResult& result, - bool sanitize_mul) { + const std::string& code, bool sanitize_mul) { if (!IsUsable()) return; if (file.WriteObject(static_cast(PrecompiledEntryKind::Decompiled)) != 1 || file.WriteObject(unique_identifier) != 1 || file.WriteObject(sanitize_mul) != 1 || - file.WriteObject(static_cast(result.code.size())) != 1 || - file.WriteArray(result.code.data(), result.code.size()) != result.code.size()) { + file.WriteObject(static_cast(code.size())) != 1 || + file.WriteArray(code.data(), code.size()) != code.size()) { LOG_ERROR(Render_OpenGL, "Failed to save decompiled cache entry - removing"); file.Close(); InvalidatePrecompiled(); } } -bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, - const ShaderDecompiler::ProgramResult& result, +bool ShaderDiskCache::SaveDecompiledToCache(u64 unique_identifier, const std::string& code, bool sanitize_mul) { if (!SaveObjectToPrecompiled(static_cast(PrecompiledEntryKind::Decompiled)) || !SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(sanitize_mul) || - !SaveObjectToPrecompiled(static_cast(result.code.size())) || - !SaveArrayToPrecompiled(result.code.data(), result.code.size())) { + !SaveObjectToPrecompiled(static_cast(code.size())) || + !SaveArrayToPrecompiled(code.data(), code.size())) { return false; } @@ -374,8 +372,7 @@ void ShaderDiskCache::SaveRaw(const ShaderDiskCacheRaw& entry) { transferable_file.Flush(); } -void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, - const ShaderDecompiler::ProgramResult& code, +void ShaderDiskCache::SaveDecompiled(u64 unique_identifier, const std::string& code, bool sanitize_mul) { if (!IsUsable()) return; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 3c3c76666..8c285b032 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -20,8 +20,7 @@ #include "common/common_types.h" #include "common/file_util.h" #include "video_core/regs.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/shader/generator/glsl_shader_gen.h" namespace Core { class System; @@ -38,6 +37,7 @@ struct ShaderDiskCacheDump; using RawShaderConfig = Pica::Regs; using ProgramCode = std::vector; +using ProgramType = Pica::Shader::Generator::ProgramType; using ShaderDecompiledMap = std::unordered_map; using ShaderDumpsMap = std::unordered_map; @@ -78,7 +78,7 @@ private: /// Contains decompiled data from a shader struct ShaderDiskCacheDecompiled { - ShaderDecompiler::ProgramResult result; + std::string code; bool sanitize_mul; }; @@ -109,8 +109,7 @@ public: void SaveRaw(const ShaderDiskCacheRaw& entry); /// Saves a decompiled entry to the precompiled file. Does not check for collisions. - void SaveDecompiled(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, - bool sanitize_mul); + void SaveDecompiled(u64 unique_identifier, const std::string& code, bool sanitize_mul); /// Saves a dump entry to the precompiled file. Does not check for collisions. void SaveDump(u64 unique_identifier, GLuint program); @@ -132,11 +131,10 @@ private: /// Saves a decompiled entry to the passed file. Does not check for collisions. void SaveDecompiledToFile(FileUtil::IOFile& file, u64 unique_identifier, - const ShaderDecompiler::ProgramResult& code, bool sanitize_mul); + const std::string& code, bool sanitize_mul); /// Saves a decompiled entry to the virtual precompiled cache. Does not check for collisions. - bool SaveDecompiledToCache(u64 unique_identifier, const ShaderDecompiler::ProgramResult& code, - bool sanitize_mul); + bool SaveDecompiledToCache(u64 unique_identifier, const std::string& code, bool sanitize_mul); /// Returns if the cache can be used bool IsUsable() const; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 90553cc7f..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null @@ -1,1941 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include "common/bit_set.h" -#include "common/logging/log.h" -#include "core/core.h" -#include "core/telemetry_session.h" -#include "video_core/pica_state.h" -#include "video_core/renderer_opengl/gl_driver.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/shader/shader_uniforms.h" -#include "video_core/video_core.h" - -using Pica::FramebufferRegs; -using Pica::LightingRegs; -using Pica::RasterizerRegs; -using Pica::TexturingRegs; -using TevStageConfig = TexturingRegs::TevStageConfig; -using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; - -namespace OpenGL { - -const std::string UniformBlockDef = Pica::Shader::BuildShaderUniformDefinitions("binding = 0,"); - -static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) { - std::string out; - - const auto append_variable = [&](std::string_view var, int location) { - if (separable_shader) { - out += fmt::format("layout (location={}) ", location); - } - out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); - }; - - append_variable("vec4 primary_color", ATTRIBUTE_COLOR); - append_variable("vec2 texcoord0", ATTRIBUTE_TEXCOORD0); - append_variable("vec2 texcoord1", ATTRIBUTE_TEXCOORD1); - append_variable("vec2 texcoord2", ATTRIBUTE_TEXCOORD2); - append_variable("float texcoord0_w", ATTRIBUTE_TEXCOORD0_W); - append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); - append_variable("vec3 view", ATTRIBUTE_VIEW); - - if (is_output && separable_shader) { - // gl_PerVertex redeclaration is required for separate shader object - out += R"( -out gl_PerVertex { - vec4 gl_Position; -#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) - float gl_ClipDistance[2]; -#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) -}; -)"; - } - - return out; -} - -PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor, - bool use_normal) { - PicaFSConfig res{}; - - auto& state = res.state; - - state.scissor_test_mode = regs.rasterizer.scissor_test.mode; - - state.depthmap_enable = regs.rasterizer.depthmap_enable; - - state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable - ? regs.framebuffer.output_merger.alpha_test.func.Value() - : FramebufferRegs::CompareFunc::Always; - - state.texture0_type = regs.texturing.texture0.type; - - state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0; - - if (GLES) { - // With GLES, we need this in the fragment shader to emulate logic operations - state.alphablend_enable = - Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1; - state.logic_op = regs.framebuffer.output_merger.logic_op; - } else { - // We don't need these otherwise, reset them to avoid unnecessary shader generation - state.alphablend_enable = {}; - state.logic_op = {}; - } - - // Copy relevant tev stages fields. - // We don't sync const_color here because of the high variance, it is a - // shader uniform instead. - const auto& tev_stages = regs.texturing.GetTevStages(); - DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); - for (std::size_t i = 0; i < tev_stages.size(); i++) { - const auto& tev_stage = tev_stages[i]; - state.tev_stages[i].sources_raw = tev_stage.sources_raw; - state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - state.tev_stages[i].ops_raw = tev_stage.ops_raw; - state.tev_stages[i].scales_raw = tev_stage.scales_raw; - if (tev_stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - state.tev_stages[i].sources_raw &= 0xFFF; - state.tev_stages[i].modifiers_raw &= 0xFFF; - state.tev_stages[i].ops_raw &= 0xF; - } - } - - state.fog_mode = regs.texturing.fog_mode; - state.fog_flip = regs.texturing.fog_flip != 0; - - state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | - regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() - << 4; - - // Fragment lighting - state.lighting.enable = !regs.lighting.disable; - if (state.lighting.enable) { - state.lighting.src_num = regs.lighting.max_light_index + 1; - - for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { - unsigned num = regs.lighting.light_enable.GetNum(light_index); - const auto& light = regs.lighting.light[num]; - state.lighting.light[light_index].num = num; - state.lighting.light[light_index].directional = light.config.directional != 0; - state.lighting.light[light_index].two_sided_diffuse = - light.config.two_sided_diffuse != 0; - state.lighting.light[light_index].geometric_factor_0 = - light.config.geometric_factor_0 != 0; - state.lighting.light[light_index].geometric_factor_1 = - light.config.geometric_factor_1 != 0; - state.lighting.light[light_index].dist_atten_enable = - !regs.lighting.IsDistAttenDisabled(num); - state.lighting.light[light_index].spot_atten_enable = - !regs.lighting.IsSpotAttenDisabled(num); - state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num); - } - - state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; - if (state.lighting.lut_d0.enable) { - state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; - state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); - state.lighting.lut_d0.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - } - - state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; - if (state.lighting.lut_d1.enable) { - state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; - state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); - state.lighting.lut_d1.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - } - - // this is a dummy field due to lack of the corresponding register - state.lighting.lut_sp.enable = true; - state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; - state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); - state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); - - state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; - if (state.lighting.lut_fr.enable) { - state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; - state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); - state.lighting.lut_fr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - } - - state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; - if (state.lighting.lut_rr.enable) { - state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; - state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); - state.lighting.lut_rr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - } - - state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; - if (state.lighting.lut_rg.enable) { - state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; - state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); - state.lighting.lut_rg.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - } - - state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; - if (state.lighting.lut_rb.enable) { - state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; - state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); - state.lighting.lut_rb.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - } - - state.lighting.config = regs.lighting.config0.config; - state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha; - state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha; - state.lighting.bump_mode = regs.lighting.config0.bump_mode; - state.lighting.bump_selector = regs.lighting.config0.bump_selector; - state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; - state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; - - state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0; - if (state.lighting.enable_shadow) { - state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0; - state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0; - state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0; - state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0; - state.lighting.shadow_selector = regs.lighting.config0.shadow_selector; - } - } - - state.proctex.enable = regs.texturing.main_config.texture3_enable; - if (state.proctex.enable) { - state.proctex.coord = regs.texturing.main_config.texture3_coordinates; - state.proctex.u_clamp = regs.texturing.proctex.u_clamp; - state.proctex.v_clamp = regs.texturing.proctex.v_clamp; - state.proctex.color_combiner = regs.texturing.proctex.color_combiner; - state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner; - state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha; - state.proctex.noise_enable = regs.texturing.proctex.noise_enable; - state.proctex.u_shift = regs.texturing.proctex.u_shift; - state.proctex.v_shift = regs.texturing.proctex.v_shift; - state.proctex.lut_width = regs.texturing.proctex_lut.width; - state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; - state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; - state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; - state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; - state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; - state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; - state.proctex.lut_filter = regs.texturing.proctex_lut.filter; - } - - const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value(); - const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value(); - if (regs.framebuffer.output_merger.alphablend_enable && !has_blend_minmax_factor) { - if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max || - rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) { - state.rgb_blend.emulate_blending = true; - state.rgb_blend.eq = rgb_eq; - state.rgb_blend.src_factor = - regs.framebuffer.output_merger.alpha_blending.factor_source_rgb; - state.rgb_blend.dst_factor = - regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb; - } - if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max || - alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) { - state.alpha_blend.emulate_blending = true; - state.alpha_blend.eq = alpha_eq; - state.alpha_blend.src_factor = - regs.framebuffer.output_merger.alpha_blending.factor_source_a; - state.alpha_blend.dst_factor = - regs.framebuffer.output_merger.alpha_blending.factor_dest_a; - } - } - - state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == - FramebufferRegs::FragmentOperationMode::Shadow; - state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0; - - state.use_custom_normal_map = use_normal; - - return res; -} - -void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - program_hash = setup.GetProgramCodeHash(); - swizzle_hash = setup.GetSwizzleDataHash(); - main_offset = regs.main_offset; - sanitize_mul = VideoCore::g_hw_shader_accurate_mul; - - num_outputs = 0; - output_map.fill(16); - - for (int reg : Common::BitSet(regs.output_mask)) { - output_map[reg] = num_outputs++; - } -} - -void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { - vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); - gs_output_attributes = vs_output_attributes; - - semantic_maps.fill({16, 0}); - for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { - const std::array semantics{ - regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), - }; - for (u32 comp = 0; comp < 4; ++comp) { - const auto semantic = semantics[comp]; - if (static_cast(semantic) < 24) { - semantic_maps[static_cast(semantic)] = {attrib, comp}; - } else if (semantic != VSOutputAttributes::INVALID) { - LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); - } - } - } -} - -/// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) -static bool IsPassThroughTevStage(const TevStageConfig& stage) { - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); -} - -static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) { - const auto& state = config.state; - switch (texture_unit) { - case 0: - // Only unit 0 respects the texturing type - switch (state.texture0_type) { - case TexturingRegs::TextureConfig::Texture2D: - return "textureLod(tex0, texcoord0, getLod(texcoord0 * vec2(textureSize(tex0, 0))) + " - "tex_lod_bias[0])"; - case TexturingRegs::TextureConfig::Projection2D: - // TODO (wwylele): find the exact LOD formula for projection texture - return "textureProj(tex0, vec3(texcoord0, texcoord0_w))"; - case TexturingRegs::TextureConfig::TextureCube: - return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; - case TexturingRegs::TextureConfig::Shadow2D: - return "shadowTexture(texcoord0, texcoord0_w)"; - case TexturingRegs::TextureConfig::ShadowCube: - return "shadowTextureCube(texcoord0, texcoord0_w)"; - case TexturingRegs::TextureConfig::Disabled: - return "vec4(0.0)"; - default: - LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type); - UNIMPLEMENTED(); - return "texture(tex0, texcoord0)"; - } - case 1: - return "textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, 0))) + " - "tex_lod_bias[1])"; - case 2: - if (state.texture2_use_coord1) - return "textureLod(tex2, texcoord1, getLod(texcoord1 * vec2(textureSize(tex2, 0))) + " - "tex_lod_bias[2])"; - else - return "textureLod(tex2, texcoord2, getLod(texcoord2 * vec2(textureSize(tex2, 0))) + " - "tex_lod_bias[2])"; - case 3: - if (state.proctex.enable) { - return "ProcTex()"; - } else { - LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it"); - return "vec4(0.0)"; - } - case 4: - return "texture(tex_normal, texcoord0)"; - default: - UNREACHABLE(); - return ""; - } -} - -/// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, const PicaFSConfig& config, - TevStageConfig::Source source, std::string_view index_name) { - using Source = TevStageConfig::Source; - switch (source) { - case Source::PrimaryColor: - out += "rounded_primary_color"; - break; - case Source::PrimaryFragmentColor: - out += "primary_fragment_color"; - break; - case Source::SecondaryFragmentColor: - out += "secondary_fragment_color"; - break; - case Source::Texture0: - out += SampleTexture(config, 0); - break; - case Source::Texture1: - out += SampleTexture(config, 1); - break; - case Source::Texture2: - out += SampleTexture(config, 2); - break; - case Source::Texture3: - out += SampleTexture(config, 3); - break; - case Source::PreviousBuffer: - out += "combiner_buffer"; - break; - case Source::Constant: - out += "const_color["; - out += index_name; - out += ']'; - break; - case Source::Previous: - out += "last_tex_env_out"; - break; - default: - out += "vec4(0.0)"; - LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source); - break; - } -} - -/// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, const PicaFSConfig& config, - TevStageConfig::ColorModifier modifier, - TevStageConfig::Source source, std::string_view index_name) { - using ColorModifier = TevStageConfig::ColorModifier; - switch (modifier) { - case ColorModifier::SourceColor: - AppendSource(out, config, source, index_name); - out += ".rgb"; - break; - case ColorModifier::OneMinusSourceColor: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".rgb"; - break; - case ColorModifier::SourceAlpha: - AppendSource(out, config, source, index_name); - out += ".aaa"; - break; - case ColorModifier::OneMinusSourceAlpha: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".aaa"; - break; - case ColorModifier::SourceRed: - AppendSource(out, config, source, index_name); - out += ".rrr"; - break; - case ColorModifier::OneMinusSourceRed: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".rrr"; - break; - case ColorModifier::SourceGreen: - AppendSource(out, config, source, index_name); - out += ".ggg"; - break; - case ColorModifier::OneMinusSourceGreen: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".ggg"; - break; - case ColorModifier::SourceBlue: - AppendSource(out, config, source, index_name); - out += ".bbb"; - break; - case ColorModifier::OneMinusSourceBlue: - out += "vec3(1.0) - "; - AppendSource(out, config, source, index_name); - out += ".bbb"; - break; - default: - out += "vec3(0.0)"; - LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier); - break; - } -} - -/// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, - TevStageConfig::AlphaModifier modifier, - TevStageConfig::Source source, const std::string& index_name) { - using AlphaModifier = TevStageConfig::AlphaModifier; - switch (modifier) { - case AlphaModifier::SourceAlpha: - AppendSource(out, config, source, index_name); - out += ".a"; - break; - case AlphaModifier::OneMinusSourceAlpha: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".a"; - break; - case AlphaModifier::SourceRed: - AppendSource(out, config, source, index_name); - out += ".r"; - break; - case AlphaModifier::OneMinusSourceRed: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".r"; - break; - case AlphaModifier::SourceGreen: - AppendSource(out, config, source, index_name); - out += ".g"; - break; - case AlphaModifier::OneMinusSourceGreen: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".g"; - break; - case AlphaModifier::SourceBlue: - AppendSource(out, config, source, index_name); - out += ".b"; - break; - case AlphaModifier::OneMinusSourceBlue: - out += "1.0 - "; - AppendSource(out, config, source, index_name); - out += ".b"; - break; - default: - out += "0.0"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier); - break; - } -} - -/// Writes the combiner function for the color components for the specified TEV stage operation -static void AppendColorCombiner(std::string& out, TevStageConfig::Operation operation, - std::string_view variable_name) { - out += "clamp("; - using Operation = TevStageConfig::Operation; - switch (operation) { - case Operation::Replace: - out += fmt::format("{}[0]", variable_name); - break; - case Operation::Modulate: - out += fmt::format("{0}[0] * {0}[1]", variable_name); - break; - case Operation::Add: - out += fmt::format("{0}[0] + {0}[1]", variable_name); - break; - case Operation::AddSigned: - out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name); - break; - case Operation::Lerp: - out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name); - break; - case Operation::Subtract: - out += fmt::format("{0}[0] - {0}[1]", variable_name); - break; - case Operation::MultiplyThenAdd: - out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); - break; - case Operation::AddThenMultiply: - out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name); - break; - case Operation::Dot3_RGB: - case Operation::Dot3_RGBA: - out += - fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name); - break; - default: - out += "vec3(0.0)"; - LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation); - break; - } - out += ", vec3(0.0), vec3(1.0))"; // Clamp result to 0.0, 1.0 -} - -/// Writes the combiner function for the alpha component for the specified TEV stage operation -static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation operation, - std::string_view variable_name) { - out += "clamp("; - using Operation = TevStageConfig::Operation; - switch (operation) { - case Operation::Replace: - out += fmt::format("{}[0]", variable_name); - break; - case Operation::Modulate: - out += fmt::format("{0}[0] * {0}[1]", variable_name); - break; - case Operation::Add: - out += fmt::format("{0}[0] + {0}[1]", variable_name); - break; - case Operation::AddSigned: - out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name); - break; - case Operation::Lerp: - out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name); - break; - case Operation::Subtract: - out += fmt::format("{0}[0] - {0}[1]", variable_name); - break; - case Operation::MultiplyThenAdd: - out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name); - break; - case Operation::AddThenMultiply: - out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name); - break; - default: - out += "0.0"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation); - break; - } - out += ", 0.0, 1.0)"; -} - -/// Writes the if-statement condition used to evaluate alpha testing -static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) { - using CompareFunc = FramebufferRegs::CompareFunc; - switch (func) { - case CompareFunc::Never: - out += "true"; - break; - case CompareFunc::Always: - out += "false"; - break; - case CompareFunc::Equal: - case CompareFunc::NotEqual: - case CompareFunc::LessThan: - case CompareFunc::LessThanOrEqual: - case CompareFunc::GreaterThan: - case CompareFunc::GreaterThanOrEqual: { - static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"}; - const auto index = static_cast(func) - static_cast(CompareFunc::Equal); - out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]); - break; - } - - default: - out += "false"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func); - break; - } -} - -/// Writes the code to emulate the specified TEV stage -static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) { - const auto stage = - static_cast(config.state.tev_stages[index]); - if (!IsPassThroughTevStage(stage)) { - const std::string index_name = std::to_string(index); - - out += fmt::format("vec3 color_results_{}_1 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); - out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); - out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name); - AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); - out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, " - "color_results_{}_2, color_results_{}_3);\n", - index_name, index_name, index_name, index_name); - - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - out += fmt::format("vec3 color_output_{} = byteround(", index_name); - AppendColorCombiner(out, stage.color_op, "color_results_" + index_name); - out += ");\n"; - - if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name); - } else { - out += fmt::format("float alpha_results_{}[3] = float[3](", index_name); - AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, - index_name); - out += ", "; - AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, - index_name); - out += ", "; - AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, - index_name); - out += ");\n"; - - out += fmt::format("float alpha_output_{} = byteround(", index_name); - AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name); - out += ");\n"; - } - - out += fmt::format("last_tex_env_out = vec4(" - "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " - "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", - index_name, stage.GetColorMultiplier(), index_name, - stage.GetAlphaMultiplier()); - } - - out += "combiner_buffer = next_combiner_buffer;\n"; - - if (config.TevStageUpdatesCombinerBufferColor(index)) - out += "next_combiner_buffer.rgb = last_tex_env_out.rgb;\n"; - - if (config.TevStageUpdatesCombinerBufferAlpha(index)) - out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; -} - -/// Writes the code to emulate fragment lighting -static void WriteLighting(std::string& out, const PicaFSConfig& config) { - const auto& lighting = config.state.lighting; - - // Define lighting globals - out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" - "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" - "vec3 light_vector = vec3(0.0);\n" - "float light_distance = 0.0;\n" - "vec3 refl_value = vec3(0.0);\n" - "vec3 spot_dir = vec3(0.0);\n" - "vec3 half_vector = vec3(0.0);\n" - "float dot_product = 0.0;\n" - "float clamp_highlights = 1.0;\n" - "float geo_factor = 1.0;\n"; - - // Compute fragment normals and tangents - const auto Perturbation = [&] { - return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); - }; - if (config.state.use_custom_normal_map) { - const std::string normal_texel = - fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, 4)); - out += fmt::format("vec3 surface_normal = {};\n", normal_texel); - out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { - // Bump mapping is enabled using a normal map - out += fmt::format("vec3 surface_normal = {};\n", Perturbation()); - - // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher - // precision result - if (lighting.bump_renorm) { - constexpr std::string_view val = - "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; - out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); - } - - // The tangent vector is not perturbed by the normal map and is just a unit vector. - out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { - // Bump mapping is enabled using a tangent map - out += fmt::format("vec3 surface_tangent = {};\n", Perturbation()); - // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant - // computation below, which is also confirmed on 3DS. So we don't bother recomputing here - // even if 'renorm' is enabled. - - // The normal vector is not perturbed by the tangent map and is just a unit vector. - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; - } else { - // No bump mapping - surface local normal and tangent are just unit vectors - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" - "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - } - - // Rotate the surface-local normal by the interpolated normal quaternion to convert it to - // eyespace. - out += "vec4 normalized_normquat = normalize(normquat);\n" - "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n" - "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; - - if (lighting.enable_shadow) { - std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); - if (lighting.shadow_invert) { - out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); - } else { - out += fmt::format("vec4 shadow = {};\n", shadow_texture); - } - } else { - out += "vec4 shadow = vec4(1.0);\n"; - } - - // Samples the specified lookup table for specular lighting - auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, - LightingRegs::LightingLutInput input, bool abs) { - std::string index; - switch (input) { - case LightingRegs::LightingLutInput::NH: - index = "dot(normal, normalize(half_vector))"; - break; - - case LightingRegs::LightingLutInput::VH: - index = "dot(normalize(view), normalize(half_vector))"; - break; - - case LightingRegs::LightingLutInput::NV: - index = "dot(normal, normalize(view))"; - break; - - case LightingRegs::LightingLutInput::LN: - index = "dot(light_vector, normal)"; - break; - - case LightingRegs::LightingLutInput::SP: - index = "dot(light_vector, spot_dir)"; - break; - - case LightingRegs::LightingLutInput::CP: - // CP input is only available with configuration 7 - if (lighting.config == LightingRegs::LightingConfig::Config7) { - // Note: even if the normal vector is modified by normal map, which is not the - // normal of the tangent plane anymore, the half angle vector is still projected - // using the modified normal vector. - constexpr std::string_view half_angle_proj = - "normalize(half_vector) - normal * dot(normal, normalize(half_vector))"; - // Note: the half angle vector projection is confirmed not normalized before the dot - // product. The result is in fact not cos(phi) as the name suggested. - index = fmt::format("dot({}, tangent)", half_angle_proj); - } else { - index = "0.0"; - } - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input); - UNIMPLEMENTED(); - index = "0.0"; - break; - } - - const auto sampler_index = static_cast(sampler); - - if (abs) { - // LUT index is in the range of (0.0, 1.0) - index = lighting.light[light_num].two_sided_diffuse - ? fmt::format("abs({})", index) - : fmt::format("max({}, 0.0)", index); - return fmt::format("LookupLightingLUTUnsigned({}, {})", sampler_index, index); - } else { - // LUT index is in the range of (-1.0, 1.0) - return fmt::format("LookupLightingLUTSigned({}, {})", sampler_index, index); - } - }; - - // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { - const auto& light_config = lighting.light[light_index]; - const std::string light_src = fmt::format("light_src[{}]", light_config.num); - - // Compute light vector (directional or positional) - if (light_config.directional) { - out += fmt::format("light_vector = {}.position;\n", light_src); - } else { - out += fmt::format("light_vector = {}.position + view;\n", light_src); - } - out += fmt::format("light_distance = length(light_vector);\n", light_src); - out += fmt::format("light_vector = normalize(light_vector);\n", light_src); - - out += fmt::format("spot_dir = {}.spot_direction;\n", light_src); - out += "half_vector = normalize(view) + light_vector;\n"; - - // Compute dot product of light_vector and normal, adjust if lighting is one-sided or - // two-sided - out += std::string("dot_product = ") + (light_config.two_sided_diffuse - ? "abs(dot(light_vector, normal));\n" - : "max(dot(light_vector, normal), 0.0);\n"); - - // If enabled, clamp specular component if lighting result is zero - if (lighting.clamp_highlights) { - out += "clamp_highlights = sign(dot_product);\n"; - } - - // If enabled, compute spot light attenuation value - std::string spot_atten = "1.0"; - if (light_config.spot_atten_enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { - const std::string value = - GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), - light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); - spot_atten = fmt::format("({:#} * {})", lighting.lut_sp.scale, value); - } - - // If enabled, compute distance attenuation value - std::string dist_atten = "1.0"; - if (light_config.dist_atten_enable) { - const std::string index = fmt::format("clamp({}.dist_atten_scale * light_distance " - "+ {}.dist_atten_bias, 0.0, 1.0)", - light_src, light_src, light_src); - const auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); - dist_atten = fmt::format("LookupLightingLUTUnsigned({}, {})", sampler, index); - } - - if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { - out += "geo_factor = dot(half_vector, half_vector);\n" - "geo_factor = geo_factor == 0.0 ? 0.0 : min(" - "dot_product / geo_factor, 1.0);\n"; - } - - // Specular 0 component - std::string d0_lut_value = "1.0"; - if (lighting.lut_d0.enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::Distribution0)) { - // Lookup specular "distribution 0" LUT value - const std::string value = - GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, - lighting.lut_d0.type, lighting.lut_d0.abs_input); - d0_lut_value = fmt::format("({:#} * {})", lighting.lut_d0.scale, value); - } - std::string specular_0 = fmt::format("({} * {}.specular_0)", d0_lut_value, light_src); - if (light_config.geometric_factor_0) { - specular_0 = fmt::format("({} * geo_factor)", specular_0); - } - - // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (lighting.lut_rr.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectRed)) { - std::string value = - GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, - lighting.lut_rr.type, lighting.lut_rr.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rr.scale, value); - out += fmt::format("refl_value.r = {};\n", value); - } else { - out += "refl_value.r = 1.0;\n"; - } - - // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (lighting.lut_rg.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectGreen)) { - std::string value = - GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, - lighting.lut_rg.type, lighting.lut_rg.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rg.scale, value); - out += fmt::format("refl_value.g = {};\n", value); - } else { - out += "refl_value.g = refl_value.r;\n"; - } - - // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (lighting.lut_rb.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::ReflectBlue)) { - std::string value = - GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, - lighting.lut_rb.type, lighting.lut_rb.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_rb.scale, value); - out += fmt::format("refl_value.b = {};\n", value); - } else { - out += "refl_value.b = refl_value.r;\n"; - } - - // Specular 1 component - std::string d1_lut_value = "1.0"; - if (lighting.lut_d1.enable && - LightingRegs::IsLightingSamplerSupported( - lighting.config, LightingRegs::LightingSampler::Distribution1)) { - // Lookup specular "distribution 1" LUT value - const std::string value = - GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, - lighting.lut_d1.type, lighting.lut_d1.abs_input); - d1_lut_value = fmt::format("({:#} * {})", lighting.lut_d1.scale, value); - } - std::string specular_1 = - fmt::format("({} * refl_value * {}.specular_1)", d1_lut_value, light_src); - if (light_config.geometric_factor_1) { - specular_1 = fmt::format("({} * geo_factor)", specular_1); - } - - // Fresnel - // Note: only the last entry in the light slots applies the Fresnel factor - if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable && - LightingRegs::IsLightingSamplerSupported(lighting.config, - LightingRegs::LightingSampler::Fresnel)) { - // Lookup fresnel LUT value - std::string value = - GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, - lighting.lut_fr.type, lighting.lut_fr.abs_input); - value = fmt::format("({:#} * {})", lighting.lut_fr.scale, value); - - // Enabled for diffuse lighting alpha component - if (lighting.enable_primary_alpha) { - out += fmt::format("diffuse_sum.a = {};\n", value); - } - - // Enabled for the specular lighting alpha component - if (lighting.enable_secondary_alpha) { - out += fmt::format("specular_sum.a = {};\n", value); - } - } - - bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable; - bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable; - std::string shadow_primary = shadow_primary_enable ? " * shadow.rgb" : ""; - std::string shadow_secondary = shadow_secondary_enable ? " * shadow.rgb" : ""; - - // Compute primary fragment color (diffuse lighting) function - out += fmt::format( - "diffuse_sum.rgb += (({}.diffuse * dot_product) + {}.ambient) * {} * {}{};\n", - light_src, light_src, dist_atten, spot_atten, shadow_primary); - - // Compute secondary fragment color (specular lighting) function - out += fmt::format("specular_sum.rgb += ({} + {}) * clamp_highlights * {} * {}{};\n", - specular_0, specular_1, dist_atten, spot_atten, shadow_secondary); - } - - // Apply shadow attenuation to alpha components if enabled - if (lighting.shadow_alpha) { - if (lighting.enable_primary_alpha) { - out += "diffuse_sum.a *= shadow.a;\n"; - } - if (lighting.enable_secondary_alpha) { - out += "specular_sum.a *= shadow.a;\n"; - } - } - - // Sum final lighting result - out += "diffuse_sum.rgb += lighting_global_ambient;\n" - "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n" - "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; -} - -using ProcTexClamp = TexturingRegs::ProcTexClamp; -using ProcTexShift = TexturingRegs::ProcTexShift; -using ProcTexCombiner = TexturingRegs::ProcTexCombiner; -using ProcTexFilter = TexturingRegs::ProcTexFilter; - -static void AppendProcTexShiftOffset(std::string& out, std::string_view v, ProcTexShift mode, - ProcTexClamp clamp_mode) { - const std::string_view offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? "1.0" : "0.5"; - switch (mode) { - case ProcTexShift::None: - out += "0.0"; - break; - case ProcTexShift::Odd: - out += fmt::format("{} * float((int({}) / 2) % 2)", offset, v); - break; - case ProcTexShift::Even: - out += fmt::format("{} * float(((int({}) + 1) / 2) % 2)", offset, v); - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); - out += "0.0"; - break; - } -} - -static void AppendProcTexClamp(std::string& out, std::string_view var, ProcTexClamp mode) { - switch (mode) { - case ProcTexClamp::ToZero: - out += fmt::format("{0} = {0} > 1.0 ? 0 : {0};\n", var); - break; - case ProcTexClamp::ToEdge: - out += fmt::format("{0} = min({0}, 1.0);\n", var); - break; - case ProcTexClamp::SymmetricalRepeat: - out += fmt::format("{0} = fract({0});\n", var); - break; - case ProcTexClamp::MirroredRepeat: { - out += fmt::format("{0} = int({0}) % 2 == 0 ? fract({0}) : 1.0 - fract({0});\n", var); - break; - } - case ProcTexClamp::Pulse: - out += fmt::format("{0} = {0} > 0.5 ? 1.0 : 0.0;\n", var); - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); - out += fmt::format("{0} = min({0}, 1.0);\n", var); - break; - } -} - -static void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner, - std::string_view offset) { - const auto combined = [combiner]() -> std::string_view { - switch (combiner) { - case ProcTexCombiner::U: - return "u"; - case ProcTexCombiner::U2: - return "(u * u)"; - case TexturingRegs::ProcTexCombiner::V: - return "v"; - case TexturingRegs::ProcTexCombiner::V2: - return "(v * v)"; - case TexturingRegs::ProcTexCombiner::Add: - return "((u + v) * 0.5)"; - case TexturingRegs::ProcTexCombiner::Add2: - return "((u * u + v * v) * 0.5)"; - case TexturingRegs::ProcTexCombiner::SqrtAdd2: - return "min(sqrt(u * u + v * v), 1.0)"; - case TexturingRegs::ProcTexCombiner::Min: - return "min(u, v)"; - case TexturingRegs::ProcTexCombiner::Max: - return "max(u, v)"; - case TexturingRegs::ProcTexCombiner::RMax: - return "min(((u + v) * 0.5 + sqrt(u * u + v * v)) * 0.5, 1.0)"; - default: - LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); - return "0.0"; - } - }(); - - out += fmt::format("ProcTexLookupLUT({}, {})", offset, combined); -} - -static void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) { - // LUT sampling uitlity - // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and - // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using - // value entries and difference entries. - out += R"( -float ProcTexLookupLUT(int offset, float coord) { - coord *= 128.0; - float index_i = clamp(floor(coord), 0.0, 127.0); - float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be - // extracted as index_i = 127.0 and index_f = 1.0 - vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg; - return clamp(entry.r + entry.g * index_f, 0.0, 1.0); -} - )"; - - // Noise utility - if (config.state.proctex.noise_enable) { - // See swrasterizer/proctex.cpp for more information about these functions - out += R"( -int ProcTexNoiseRand1D(int v) { - const int table[] = int[](0,4,10,8,4,9,7,12,5,15,13,14,11,15,2,11); - return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; -} - -float ProcTexNoiseRand2D(vec2 point) { - const int table[] = int[](10,2,15,8,0,7,4,5,5,13,2,6,13,9,3,14); - int u2 = ProcTexNoiseRand1D(int(point.x)); - int v2 = ProcTexNoiseRand1D(int(point.y)); - v2 += ((u2 & 3) == 1) ? 4 : 0; - v2 ^= (u2 & 1) * 6; - v2 += 10 + u2; - v2 &= 0xF; - v2 ^= table[u2]; - return -1.0 + float(v2) * 2.0/ 15.0; -} - -float ProcTexNoiseCoef(vec2 x) { - vec2 grid = 9.0 * proctex_noise_f * abs(x + proctex_noise_p); - vec2 point = floor(grid); - vec2 frac = grid - point; - - float g0 = ProcTexNoiseRand2D(point) * (frac.x + frac.y); - float g1 = ProcTexNoiseRand2D(point + vec2(1.0, 0.0)) * (frac.x + frac.y - 1.0); - float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0); - float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0); - - float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x); - float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y); - float x0 = mix(g0, g1, x_noise); - float x1 = mix(g2, g3, x_noise); - return mix(x0, x1, y_noise); -} - )"; - } - - out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n"; - out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width); - // Offsets for level 4-7 seem to be hardcoded - out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n", - config.state.proctex.lut_offset0, config.state.proctex.lut_offset1, - config.state.proctex.lut_offset2, config.state.proctex.lut_offset3); - out += "int lut_offset = lut_offsets[level];\n"; - // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] - out += "lut_coord *= float(lut_width - 1);\n"; - - switch (config.state.proctex.lut_filter) { - case ProcTexFilter::Linear: - case ProcTexFilter::LinearMipmapLinear: - case ProcTexFilter::LinearMipmapNearest: - out += "int lut_index_i = int(lut_coord) + lut_offset;\n"; - out += "float lut_index_f = fract(lut_coord);\n"; - out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + " - "proctex_lut_offset) + " - "lut_index_f * " - "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n"; - break; - case ProcTexFilter::Nearest: - case ProcTexFilter::NearestMipmapLinear: - case ProcTexFilter::NearestMipmapNearest: - out += "lut_coord += float(lut_offset);\n"; - out += "return texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + " - "proctex_lut_offset);\n"; - break; - } - - out += "}\n"; - - out += "vec4 ProcTex() {\n"; - if (config.state.proctex.coord < 3) { - out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord); - } else { - LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); - out += "vec2 uv = abs(texcoord0);\n"; - } - - // This LOD formula is the same as the LOD upper limit defined in OpenGL. - // f(x, y) <= m_u + m_v + m_w - // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) - // Note: this is different from the one normal 2D textures use. - out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n"; - // unlike normal texture, the bias is inside the log2 - out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n", - config.state.proctex.lut_width); - out += "if (proctex_bias == 0.0) lod = 0.0;\n"; - out += fmt::format("lod = clamp(lod, {:#}, {:#});\n", - std::max(0.0f, static_cast(config.state.proctex.lod_min)), - std::min(7.0f, static_cast(config.state.proctex.lod_max))); - // Get shift offset before noise generation - out += "float u_shift = "; - AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift, - config.state.proctex.u_clamp); - out += ";\n"; - out += "float v_shift = "; - AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift, - config.state.proctex.v_clamp); - out += ";\n"; - - // Generate noise - if (config.state.proctex.noise_enable) { - out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n" - "uv = abs(uv);\n"; - } - - // Shift - out += "float u = uv.x + u_shift;\n" - "float v = uv.y + v_shift;\n"; - - // Clamp - AppendProcTexClamp(out, "u", config.state.proctex.u_clamp); - AppendProcTexClamp(out, "v", config.state.proctex.v_clamp); - - // Combine and map - out += "float lut_coord = "; - AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, - "proctex_color_map_offset"); - out += ";\n"; - - switch (config.state.proctex.lut_filter) { - case ProcTexFilter::Linear: - case ProcTexFilter::Nearest: - out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n"; - break; - case ProcTexFilter::NearestMipmapNearest: - case ProcTexFilter::LinearMipmapNearest: - out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n"; - break; - case ProcTexFilter::NearestMipmapLinear: - case ProcTexFilter::LinearMipmapLinear: - out += "int lod_i = int(lod);\n" - "float lod_f = fract(lod);\n" - "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), " - "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n"; - break; - } - - if (config.state.proctex.separate_alpha) { - // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It - // uses the output of CombineAndMap directly instead. - out += "float final_alpha = "; - AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, - "proctex_alpha_map_offset"); - out += ";\n"; - out += "return vec4(final_color.xyz, final_alpha);\n}\n"; - } else { - out += "return final_color;\n}\n"; - } -} - -static void WriteLogicOp(std::string& out, const PicaFSConfig& config) { - if (!GLES || config.state.alphablend_enable) { - return; - } - switch (config.state.logic_op) { - case FramebufferRegs::LogicOp::Clear: - out += "color = vec4(0);\n"; - break; - case FramebufferRegs::LogicOp::Set: - out += "color = vec4(1);\n"; - break; - case FramebufferRegs::LogicOp::Copy: - // Take the color output as-is - break; - case FramebufferRegs::LogicOp::CopyInverted: - out += "color = ~color;\n"; - break; - case FramebufferRegs::LogicOp::NoOp: - // We need to discard the color, but not necessarily the depth. This is not possible - // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast(config.state.logic_op)); - UNIMPLEMENTED(); - } -} - -static void WriteBlending(std::string& out, const PicaFSConfig& config) { - if (!config.state.rgb_blend.emulate_blending && !config.state.alpha_blend.emulate_blending) - [[likely]] { - return; - } - - using BlendFactor = Pica::FramebufferRegs::BlendFactor; - out += R"( -vec4 source_color = last_tex_env_out; -#if defined(GL_EXT_shader_framebuffer_fetch) -vec4 dest_color = color; -#elif defined(GL_ARM_shader_framebuffer_fetch) -vec4 dest_color = gl_LastFragColorARM; -#else -vec4 dest_color = texelFetch(colorBuffer, ivec2(gl_FragCoord.xy), 0); -#endif -)"; - const auto get_factor = [&](BlendFactor factor) -> std::string { - switch (factor) { - case BlendFactor::Zero: - return "vec4(0.f)"; - case BlendFactor::One: - return "vec4(1.f)"; - case BlendFactor::SourceColor: - return "source_color"; - case BlendFactor::OneMinusSourceColor: - return "vec4(1.f) - source_color"; - case BlendFactor::DestColor: - return "dest_color"; - case BlendFactor::OneMinusDestColor: - return "vec4(1.f) - dest_color"; - case BlendFactor::SourceAlpha: - return "source_color.aaaa"; - case BlendFactor::OneMinusSourceAlpha: - return "vec4(1.f) - source_color.aaaa"; - case BlendFactor::DestAlpha: - return "dest_color.aaaa"; - case BlendFactor::OneMinusDestAlpha: - return "vec4(1.f) - dest_color.aaaa"; - case BlendFactor::ConstantColor: - return "blend_color"; - case BlendFactor::OneMinusConstantColor: - return "vec4(1.f) - blend_color"; - case BlendFactor::ConstantAlpha: - return "blend_color.aaaa"; - case BlendFactor::OneMinusConstantAlpha: - return "vec4(1.f) - blend_color.aaaa"; - default: - LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor); - return "vec4(1.f)"; - } - }; - const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) { - return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max"; - }; - - if (config.state.rgb_blend.emulate_blending) { - out += fmt::format( - "last_tex_env_out.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n", - get_func(config.state.rgb_blend.eq), get_factor(config.state.rgb_blend.src_factor), - get_factor(config.state.rgb_blend.dst_factor)); - } - if (config.state.alpha_blend.emulate_blending) { - out += fmt::format( - "last_tex_env_out.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n", - get_func(config.state.alpha_blend.eq), get_factor(config.state.alpha_blend.src_factor), - get_factor(config.state.alpha_blend.dst_factor)); - } -} - -ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, - bool separable_shader) { - const auto& state = config.state; - std::string out; - - if (separable_shader && !GLES) { - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - } - - // The extension directives need to come before non-preprocessor tokens - out += R"( -#if defined(GL_EXT_shader_framebuffer_fetch) -#extension GL_EXT_shader_framebuffer_fetch : enable -#elif defined(GL_ARM_shader_framebuffer_fetch) -#extension GL_ARM_shader_framebuffer_fetch : enable -#else -#define CITRA_NO_FRAMEBUFFER_FETCH 1 -#endif - -)"; - - if (GLES) { - out += fragment_shader_precision_OES; - } - - out += GetVertexInterfaceDeclaration(false, separable_shader); - - out += R"( -#ifndef CITRA_GLES -in vec4 gl_FragCoord; -#endif // CITRA_GLES - -layout(location = 0) out vec4 color; - -layout(binding = 0) uniform sampler2D tex0; -layout(binding = 1) uniform sampler2D tex1; -layout(binding = 2) uniform sampler2D tex2; -layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf; -layout(binding = 4) uniform samplerBuffer texture_buffer_lut_rg; -layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba; -layout(binding = 6) uniform samplerCube tex_cube; -layout(binding = 7) uniform sampler2D tex_normal; - -layout(binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; -layout(binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; -layout(binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; -layout(binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; -layout(binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; -layout(binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; -layout(binding = 6, r32ui) uniform uimage2D shadow_buffer; - -#if defined(CITRA_NO_FRAMEBUFFER_FETCH) -layout(location = 10) uniform sampler2D colorBuffer; -#endif -)"; - - out += UniformBlockDef; - - out += R"( -// Rotate the vector v by the quaternion q -vec3 quaternion_rotate(vec4 q, vec3 v) { - return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); -} - -float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; - return entry.r + entry.g * delta; -} - -float LookupLightingLUTUnsigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 256.0), 0.f, 255.f)); - float delta = pos * 256.0 - float(index); - return LookupLightingLUT(lut_index, index, delta); -} - -float LookupLightingLUTSigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 128.0), -128.f, 127.f)); - float delta = pos * 128.0 - float(index); - if (index < 0) index += 256; - return LookupLightingLUT(lut_index, index, delta); -} - -float byteround(float x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec2 byteround(vec2 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec3 byteround(vec3 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -vec4 byteround(vec4 x) { - return round(x * 255.0) * (1.0 / 255.0); -} - -// PICA's LOD formula for 2D textures. -// This LOD formula is the same as the LOD lower limit defined in OpenGL. -// f(x, y) >= max{m_u, m_v, m_w} -// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail) -float getLod(vec2 coord) { - vec2 d = max(abs(dFdx(coord)), abs(dFdy(coord))); - return log2(max(d.x, d.y)); -} - -uvec2 DecodeShadow(uint pixel) { - return uvec2(pixel >> 8, pixel & 0xFFu); -} - -uint EncodeShadow(uvec2 pixel) { - return (pixel.x << 8) | pixel.y; -} - -float CompareShadow(uint pixel, uint z) { - uvec2 p = DecodeShadow(pixel); - return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); -} - -float SampleShadow2D(ivec2 uv, uint z) { - if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) - return 1.0; - return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); -} - -float mix2(vec4 s, vec2 a) { - vec2 t = mix(s.xy, s.zw, a.yy); - return mix(t.x, t.y, a.x); -} - -vec4 shadowTexture(vec2 uv, float w) { -)"; - if (!config.state.shadow_texture_orthographic) { - out += "uv /= w;"; - } - out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; - out += R"( - vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); - vec2 coord_floor = floor(coord); - vec2 f = coord - coord_floor; - ivec2 i = ivec2(coord_floor); - vec4 s = vec4( - SampleShadow2D(i , z), - SampleShadow2D(i + ivec2(1, 0), z), - SampleShadow2D(i + ivec2(0, 1), z), - SampleShadow2D(i + ivec2(1, 1), z)); - return vec4(mix2(s, f)); -} - -vec4 shadowTextureCube(vec2 uv, float w) { - ivec2 size = imageSize(shadow_texture_px); - vec3 c = vec3(uv, w); - vec3 a = abs(c); - if (a.x > a.y && a.x > a.z) { - w = a.x; - uv = -c.zy; - if (c.x < 0.0) uv.x = -uv.x; - } else if (a.y > a.z) { - w = a.y; - uv = c.xz; - if (c.y < 0.0) uv.y = -uv.y; - } else { - w = a.z; - uv = -c.xy; - if (c.z > 0.0) uv.x = -uv.x; - } -)"; - out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; - out += R"( - vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); - vec2 coord_floor = floor(coord); - vec2 f = coord - coord_floor; - ivec2 i00 = ivec2(coord_floor); - ivec2 i10 = i00 + ivec2(1, 0); - ivec2 i01 = i00 + ivec2(0, 1); - ivec2 i11 = i00 + ivec2(1, 1); - ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1); - i00 = clamp(i00, cmin, cmax); - i10 = clamp(i10, cmin, cmax); - i01 = clamp(i01, cmin, cmax); - i11 = clamp(i11, cmin, cmax); - uvec4 pixels; - // This part should have been refactored into functions, - // but many drivers don't like passing uimage2D as parameters - if (a.x > a.y && a.x > a.z) { - if (c.x > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_px, i00).r, - imageLoad(shadow_texture_px, i10).r, - imageLoad(shadow_texture_px, i01).r, - imageLoad(shadow_texture_px, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_nx, i00).r, - imageLoad(shadow_texture_nx, i10).r, - imageLoad(shadow_texture_nx, i01).r, - imageLoad(shadow_texture_nx, i11).r); - } else if (a.y > a.z) { - if (c.y > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_py, i00).r, - imageLoad(shadow_texture_py, i10).r, - imageLoad(shadow_texture_py, i01).r, - imageLoad(shadow_texture_py, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_ny, i00).r, - imageLoad(shadow_texture_ny, i10).r, - imageLoad(shadow_texture_ny, i01).r, - imageLoad(shadow_texture_ny, i11).r); - } else { - if (c.z > 0.0) - pixels = uvec4( - imageLoad(shadow_texture_pz, i00).r, - imageLoad(shadow_texture_pz, i10).r, - imageLoad(shadow_texture_pz, i01).r, - imageLoad(shadow_texture_pz, i11).r); - else - pixels = uvec4( - imageLoad(shadow_texture_nz, i00).r, - imageLoad(shadow_texture_nz, i10).r, - imageLoad(shadow_texture_nz, i01).r, - imageLoad(shadow_texture_nz, i11).r); - } - vec4 s = vec4( - CompareShadow(pixels.x, z), - CompareShadow(pixels.y, z), - CompareShadow(pixels.z, z), - CompareShadow(pixels.w, z)); - return vec4(mix2(s, f)); -} -)"; - - if (config.state.proctex.enable) - AppendProcTexSampler(out, config); - - // We round the interpolated primary color to the nearest 1/255th - // This maintains the PICA's 8 bits of precision - out += R"( -void main() { -vec4 rounded_primary_color = byteround(primary_color); -vec4 primary_fragment_color = vec4(0.0); -vec4 secondary_fragment_color = vec4(0.0); -)"; - - // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) { - out += "discard; }"; - return {std::move(out)}; - } - - // Append the scissor test - if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) { - out += "if ("; - // Negate the condition if we have to keep only the pixels outside the scissor box - if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) { - out += '!'; - } - out += "(gl_FragCoord.x >= float(scissor_x1) && " - "gl_FragCoord.y >= float(scissor_y1) && " - "gl_FragCoord.x < float(scissor_x2) && " - "gl_FragCoord.y < float(scissor_y2))) discard;\n"; - } - - // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use - // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then - // do our own transformation according to PICA specification. - out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" - "float depth = z_over_w * depth_scale + depth_offset;\n"; - if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { - out += "depth /= gl_FragCoord.w;\n"; - } - - if (state.lighting.enable) - WriteLighting(out, config); - - out += "vec4 combiner_buffer = vec4(0.0);\n" - "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n" - "vec4 last_tex_env_out = rounded_primary_color;\n"; - - for (std::size_t index = 0; index < state.tev_stages.size(); ++index) { - WriteTevStage(out, config, static_cast(index)); - } - - if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) { - out += "if ("; - AppendAlphaTestCondition(out, state.alpha_test_func); - out += ") discard;\n"; - } - - // Append fog combiner - if (state.fog_mode == TexturingRegs::FogMode::Fog) { - // Get index into fog LUT - if (state.fog_flip) { - out += "float fog_index = (1.0 - float(depth)) * 128.0;\n"; - } else { - out += "float fog_index = depth * 128.0;\n"; - } - - // Generate clamped fog factor from LUT for given fog index - out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n" - "float fog_f = fog_index - fog_i;\n" - "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + " - "fog_lut_offset).rg;\n" - "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n" - "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; - - // Blend the fog - out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; - } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { - Core::System::GetInstance().TelemetrySession().AddField( - Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); - LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); - out += "discard; }"; - return {std::move(out)}; - } - - if (state.shadow_rendering) { - out += R"( -uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); -uint s = uint(last_tex_env_out.g * float(0xFF)); -ivec2 image_coord = ivec2(gl_FragCoord.xy); - -uint old = imageLoad(shadow_buffer, image_coord).x; -uint new; -uint old2; -do { - old2 = old; - - uvec2 ref = DecodeShadow(old); - if (d < ref.x) { - if (s == 0u) { - ref.x = d; - } else { - s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); - ref.y = min(s, ref.y); - } - } - new = EncodeShadow(ref); - -} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2); -)"; - } else { - out += "gl_FragDepth = depth;\n"; - // Round the final fragment color to maintain the PICA's 8 bits of precision - out += "last_tex_env_out = byteround(last_tex_env_out);\n"; - WriteBlending(out, config); - out += "color = last_tex_env_out;\n"; - } - - WriteLogicOp(out, config); - - out += '}'; - - return {std::move(out)}; -} - -ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) { - std::string out; - if (separable_shader && !GLES) { - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - } - - out += - fmt::format("layout(location = {}) in vec4 vert_position;\n" - "layout(location = {}) in vec4 vert_color;\n" - "layout(location = {}) in vec2 vert_texcoord0;\n" - "layout(location = {}) in vec2 vert_texcoord1;\n" - "layout(location = {}) in vec2 vert_texcoord2;\n" - "layout(location = {}) in float vert_texcoord0_w;\n" - "layout(location = {}) in vec4 vert_normquat;\n" - "layout(location = {}) in vec3 vert_view;\n", - ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); - - out += GetVertexInterfaceDeclaration(true, separable_shader); - - out += UniformBlockDef; - - // Certain games render 2D elements very close to clip plane 0 resulting in very tiny - // negative/positive z values when computing with f32 precision, - // causing some vertices to get erroneously clipped. To workaround this problem, - // we can use a very small epsilon value for clip plane comparison. - out += R"( -const float EPSILON_Z = 0.00000001f; - -void main() { - primary_color = vert_color; - texcoord0 = vert_texcoord0; - texcoord1 = vert_texcoord1; - texcoord2 = vert_texcoord2; - texcoord0_w = vert_texcoord0_w; - normquat = vert_normquat; - view = vert_view; - vec4 vtx_pos = vert_position; - if (abs(vtx_pos.z) < EPSILON_Z) { - vtx_pos.z = 0.f; - } - gl_Position = vtx_pos; -#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) - gl_ClipDistance[0] = -vtx_pos.z; // fixed PICA clipping plane z <= 0 - gl_ClipDistance[1] = dot(clip_coef, vtx_pos); -#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance) -} -)"; - - return {std::move(out)}; -} - -std::optional GenerateVertexShader( - const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) { - std::string out; - if (separable_shader && !GLES) { - out += "#extension GL_ARB_separate_shader_objects : enable\n"; - } - - out += ShaderDecompiler::GetCommonDeclarations(); - - std::array used_regs{}; - const auto get_input_reg = [&used_regs](u32 reg) { - ASSERT(reg < 16); - used_regs[reg] = true; - return fmt::format("vs_in_reg{}", reg); - }; - - const auto get_output_reg = [&](u32 reg) -> std::string { - ASSERT(reg < 16); - if (config.state.output_map[reg] < config.state.num_outputs) { - return fmt::format("vs_out_attr{}", config.state.output_map[reg]); - } - return ""; - }; - - auto program_source_opt = ShaderDecompiler::DecompileProgram( - setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg, - get_output_reg, config.state.sanitize_mul); - - if (!program_source_opt) - return std::nullopt; - - std::string& program_source = program_source_opt->code; - - out += R"( -#define uniforms vs_uniforms -layout (binding = 1, std140) uniform vs_config { - pica_uniforms uniforms; -}; - -)"; - // input attributes declaration - for (std::size_t i = 0; i < used_regs.size(); ++i) { - if (used_regs[i]) { - out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); - } - } - out += '\n'; - - // output attributes declaration - for (u32 i = 0; i < config.state.num_outputs; ++i) { - out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) + - " out vec4 vs_out_attr" + std::to_string(i) + ";\n"; - } - - out += "\nvoid main() {\n"; - for (u32 i = 0; i < config.state.num_outputs; ++i) { - out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); - } - out += "\n exec_shader();\n}\n\n"; - - out += program_source; - - return {{std::move(out)}}; -} - -static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool separable_shader) { - std::string out = GetVertexInterfaceDeclaration(true, separable_shader); - out += UniformBlockDef; - out += ShaderDecompiler::GetCommonDeclarations(); - - out += '\n'; - for (u32 i = 0; i < config.vs_output_attributes; ++i) { - out += (separable_shader ? "layout(location = " + std::to_string(i) + ")" : std::string{}) + - " in vec4 vs_out_attr" + std::to_string(i) + "[];\n"; - } - - out += R"( -struct Vertex { -)"; - out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes); - out += "};\n\n"; - - const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string { - const u32 slot = static_cast(slot_semantic); - const u32 attrib = config.semantic_maps[slot].attribute_index; - const u32 comp = config.semantic_maps[slot].component_index; - if (attrib < config.gs_output_attributes) { - return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]); - } - return "1.0"; - }; - - out += "const float EPSILON_Z = 0.00000001f;\n\n"; - out += "vec4 GetVertexQuaternion(Vertex vtx) {\n"; - out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + - semantic(VSOutputAttributes::QUATERNION_Y) + ", " + - semantic(VSOutputAttributes::QUATERNION_Z) + ", " + - semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; - out += "}\n\n"; - - out += "void EmitVtx(Vertex vtx, bool quats_opposite) {\n"; - out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + - semantic(VSOutputAttributes::POSITION_Y) + ", " + - semantic(VSOutputAttributes::POSITION_Z) + ", " + - semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " if (abs(vtx_pos.z) < EPSILON_Z) {\n"; - out += " vtx_pos.z = 0.f;\n"; - out += " }\n"; - out += " gl_Position = vtx_pos;\n"; - out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; - out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 - out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; - out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; - - out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n"; - out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n"; - - out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + - semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + - ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; - out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; - - out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; - out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; - - out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; - out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + - semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + - ");\n\n"; - - out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + - semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; - - out += " EmitVertex();\n"; - out += "}\n"; - - out += R"( -bool AreQuaternionsOpposite(vec4 qa, vec4 qb) { - return (dot(qa, qb) < 0.0); -} - -void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { - EmitVtx(vtx0, false); - EmitVtx(vtx1, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx1))); - EmitVtx(vtx2, AreQuaternionsOpposite(GetVertexQuaternion(vtx0), GetVertexQuaternion(vtx2))); - EndPrimitive(); -} -)"; - - return out; -}; - -ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, - bool separable_shader) { - std::string out; - if (separable_shader && !GLES) { - out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; - } - - out += R"( -layout(triangles) in; -layout(triangle_strip, max_vertices = 3) out; - -)"; - - out += GetGSCommonSource(config.state, separable_shader); - - out += R"( -void main() { - Vertex prim_buffer[3]; -)"; - for (u32 vtx = 0; vtx < 3; ++vtx) { - out += fmt::format(" prim_buffer[{}].attributes = vec4[{}](", vtx, - config.state.gs_output_attributes); - for (u32 i = 0; i < config.state.vs_output_attributes; ++i) { - out += fmt::format("{}vs_out_attr{}[{}]", i == 0 ? "" : ", ", i, vtx); - } - out += ");\n"; - } - out += " EmitPrim(prim_buffer[0], prim_buffer[1], prim_buffer[2]);\n"; - out += "}\n"; - - return {std::move(out)}; -} -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index 9249e4b51..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once -#include -#include -#include "common/hash.h" -#include "video_core/regs.h" -#include "video_core/shader/shader.h" - -namespace OpenGL { - -class Driver; - -namespace ShaderDecompiler { -struct ProgramResult; -} - -enum class ProgramType : u32 { VS, GS, FS }; - -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, -}; - -// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs() -struct TevStageConfigRaw { - u32 sources_raw; - u32 modifiers_raw; - u32 ops_raw; - u32 scales_raw; - explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { - Pica::TexturingRegs::TevStageConfig stage; - stage.sources_raw = sources_raw; - stage.modifiers_raw = modifiers_raw; - stage.ops_raw = ops_raw; - stage.const_color = 0; - stage.scales_raw = scales_raw; - return stage; - } -}; - -struct PicaFSConfigState { - Pica::FramebufferRegs::CompareFunc alpha_test_func; - Pica::RasterizerRegs::ScissorMode scissor_test_mode; - Pica::TexturingRegs::TextureConfig::TextureType texture0_type; - bool texture2_use_coord1; - std::array tev_stages; - u8 combiner_buffer_input; - - Pica::RasterizerRegs::DepthBuffering depthmap_enable; - Pica::TexturingRegs::FogMode fog_mode; - bool fog_flip; - bool alphablend_enable; - Pica::FramebufferRegs::LogicOp logic_op; - - struct { - struct { - unsigned num; - bool directional; - bool two_sided_diffuse; - bool dist_atten_enable; - bool spot_atten_enable; - bool geometric_factor_0; - bool geometric_factor_1; - bool shadow_enable; - } light[8]; - - bool enable; - unsigned src_num; - Pica::LightingRegs::LightingBumpMode bump_mode; - unsigned bump_selector; - bool bump_renorm; - bool clamp_highlights; - - Pica::LightingRegs::LightingConfig config; - bool enable_primary_alpha; - bool enable_secondary_alpha; - - bool enable_shadow; - bool shadow_primary; - bool shadow_secondary; - bool shadow_invert; - bool shadow_alpha; - unsigned shadow_selector; - - struct { - bool enable; - bool abs_input; - Pica::LightingRegs::LightingLutInput type; - float scale; - } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; - - struct { - bool enable; - u32 coord; - Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp; - Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner; - bool separate_alpha; - bool noise_enable; - Pica::TexturingRegs::ProcTexShift u_shift, v_shift; - u32 lut_width; - u32 lut_offset0; - u32 lut_offset1; - u32 lut_offset2; - u32 lut_offset3; - u32 lod_min; - u32 lod_max; - Pica::TexturingRegs::ProcTexFilter lut_filter; - } proctex; - - struct { - bool emulate_blending; - Pica::FramebufferRegs::BlendEquation eq; - Pica::FramebufferRegs::BlendFactor src_factor; - Pica::FramebufferRegs::BlendFactor dst_factor; - } rgb_blend, alpha_blend; - - bool shadow_rendering; - bool shadow_texture_orthographic; - bool use_custom_normal_map; -}; - -/** - * This struct contains all state used to generate the GLSL fragment shader that emulates the - * current Pica register configuration. This struct is used as a cache key for generated GLSL shader - * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by - * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where - * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) - * two separate shaders sharing the same key. - */ -struct PicaFSConfig : Common::HashableStruct { - - /// Construct a PicaFSConfig with the given Pica register configuration. - static PicaFSConfig BuildFromRegs(const Pica::Regs& regs, bool has_blend_minmax_factor, - bool use_normal = false); - - bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); - } - - bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); - } -}; - -/** - * This struct contains common information to identify a GL vertex/geometry shader generated from - * PICA vertex/geometry shader. - */ -struct PicaShaderConfigCommon { - void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); - - u64 program_hash; - u64 swizzle_hash; - u32 main_offset; - bool sanitize_mul; - - u32 num_outputs; - - // output_map[output register index] -> output attribute index - std::array output_map; -}; - -/** - * This struct contains information to identify a GL vertex shader generated from PICA vertex - * shader. - */ -struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - state.Init(regs, setup); - } - explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { - state = conf; - } -}; - -struct PicaGSConfigCommonRaw { - void Init(const Pica::Regs& regs); - - u32 vs_output_attributes; - u32 gs_output_attributes; - - struct SemanticMap { - u32 attribute_index; - u32 component_index; - }; - - // semantic_maps[semantic name] -> GS output attribute index + component index - std::array semantic_maps; -}; - -/** - * This struct contains information to identify a GL geometry shader generated from PICA no-geometry - * shader pipeline - */ -struct PicaFixedGSConfig : Common::HashableStruct { - explicit PicaFixedGSConfig(const Pica::Regs& regs) { - state.Init(regs); - } -}; - -/** - * Generates the GLSL vertex shader program source code that accepts vertices from software shader - * and directly passes them to the fragment shader. - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader); - -/** - * Generates the GLSL vertex shader program source code for the given VS program - * @returns String of the shader source code; boost::none on failure - */ -std::optional GenerateVertexShader( - const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader); - -/* - * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline - * @returns String of the shader source code - */ -ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config, - bool separable_shader); - -/** - * Generates the GLSL fragment shader program source code for the current Pica state - * @param config ShaderCacheKey object generated for the current Pica state, used for the shader - * configuration (NOTE: Use state in this struct only, not the Pica registers!) - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config, - bool separable_shader); - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - std::size_t operator()(const OpenGL::PicaFSConfig& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - std::size_t operator()(const OpenGL::PicaVSConfig& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - std::size_t operator()(const OpenGL::PicaFixedGSConfig& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index ba0b992e0..e171820bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -14,9 +14,11 @@ #include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/shader/shader_uniforms.h" +#include "video_core/shader/generator/shader_uniforms.h" #include "video_core/video_core.h" +using namespace Pica::Shader::Generator; + namespace OpenGL { static u64 GetUniqueIdentifier(const Pica::Regs& regs, const ProgramCode& code) { @@ -74,7 +76,7 @@ static std::set GetSupportedFormats() { } static std::tuple BuildVSConfigFromRaw( - const ShaderDiskCacheRaw& raw) { + const ShaderDiskCacheRaw& raw, const Driver& driver) { Pica::Shader::ProgramCode program_code{}; Pica::Shader::SwizzleData swizzle_data{}; std::copy_n(raw.GetProgramCode().begin(), Pica::Shader::MAX_PROGRAM_CODE_LENGTH, @@ -84,7 +86,8 @@ static std::tuple BuildVSConfigFromRaw( Pica::Shader::ShaderSetup setup; setup.program_code = program_code; setup.swizzle_data = swizzle_data; - return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup}; + return {PicaVSConfig{raw.GetRawShaderConfig(), setup, driver.HasClipCullDistance(), true}, + setup}; } /** @@ -130,8 +133,10 @@ private: class TrivialVertexShader { public: - explicit TrivialVertexShader(bool separable) : program(separable) { - program.Create(GenerateTrivialVertexShader(separable).code.c_str(), GL_VERTEX_SHADER); + explicit TrivialVertexShader(const Driver& driver, bool separable) : program(separable) { + const auto code = + GLSL::GenerateTrivialVertexShader(driver.HasClipCullDistance(), separable); + program.Create(code.c_str(), GL_VERTEX_SHADER); } GLuint Get() const { return program.GetHandle(); @@ -141,20 +146,18 @@ private: OGLShaderStage program; }; -template class ShaderCache { public: explicit ShaderCache(bool separable) : separable(separable) {} - std::tuple> Get( - const KeyConfigType& config) { + std::tuple> Get(const KeyConfigType& config) { auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable}); OGLShaderStage& cached_shader = iter->second; - std::optional result{}; + std::optional result{}; if (new_shader) { result = CodeGenerator(config, separable); - cached_shader.Create(result->code.c_str(), ShaderType); + cached_shader.Create(result->c_str(), ShaderType); } return {cached_shader.GetHandle(), std::move(result)}; } @@ -180,29 +183,27 @@ private: // program buffer from the previous shader, which is hashed into the config, resulting several // different config values from the same shader program. template (*CodeGenerator)( - const Pica::Shader::ShaderSetup&, const KeyConfigType&, bool), + std::string (*CodeGenerator)(const Pica::Shader::ShaderSetup&, const KeyConfigType&, + bool), GLenum ShaderType> class ShaderDoubleCache { public: explicit ShaderDoubleCache(bool separable) : separable(separable) {} - std::tuple> Get( - const KeyConfigType& key, const Pica::Shader::ShaderSetup& setup) { - std::optional result{}; + std::tuple> Get(const KeyConfigType& key, + const Pica::Shader::ShaderSetup& setup) { + std::optional result{}; auto map_it = shader_map.find(key); if (map_it == shader_map.end()) { - auto program_opt = CodeGenerator(setup, key, separable); - if (!program_opt) { + auto program = CodeGenerator(setup, key, separable); + if (program.empty()) { shader_map[key] = nullptr; return {0, std::nullopt}; } - std::string& program = program_opt->code; auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{separable}); OGLShaderStage& cached_shader = iter->second; if (new_shader) { - result.emplace(); - result->code = program; + result = program; cached_shader.Create(program.c_str(), ShaderType); } shader_map[key] = &cached_shader; @@ -237,18 +238,19 @@ private: }; using ProgrammableVertexShaders = - ShaderDoubleCache; + ShaderDoubleCache; using FixedGeometryShaders = - ShaderCache; + ShaderCache; -using FragmentShaders = ShaderCache; +using FragmentShaders = + ShaderCache; class ShaderProgramManager::Impl { public: - explicit Impl(bool separable) + explicit Impl(const Driver& driver, bool separable) : separable(separable), programmable_vertex_shaders(separable), - trivial_vertex_shader(separable), fixed_geometry_shaders(separable), + trivial_vertex_shader(driver, separable), fixed_geometry_shaders(separable), fragment_shaders(separable), disk_cache(separable) { if (separable) pipeline.Create(); @@ -299,13 +301,13 @@ ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, con bool separable) : emu_window{emu_window_}, driver{driver_}, strict_context_required{emu_window.StrictContextRequired()}, impl{std::make_unique( - separable)} {} + driver_, separable)} {} ShaderProgramManager::~ShaderProgramManager() = default; bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) { - PicaVSConfig config{regs.vs, setup}; + PicaVSConfig config{regs, setup, driver.HasClipCullDistance(), true}; auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup); if (handle == 0) return false; @@ -333,7 +335,7 @@ void ShaderProgramManager::UseTrivialVertexShader() { } void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) { - PicaFixedGSConfig gs_config(regs); + PicaFixedGSConfig gs_config(regs, driver.HasClipCullDistance()); auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config); impl->current.gs = handle; impl->current.gs_hash = gs_config.Hash(); @@ -345,8 +347,8 @@ void ShaderProgramManager::UseTrivialGeometryShader() { } void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) { - PicaFSConfig config = - PicaFSConfig::BuildFromRegs(regs, driver.HasBlendMinMaxFactor(), use_normal); + PicaFSConfig config(regs, false, driver.IsOpenGLES(), false, driver.HasBlendMinMaxFactor(), + use_normal); auto [handle, result] = impl->fragment_shaders.Get(config); impl->current.fs = handle; impl->current.fs_hash = config.Hash(); @@ -463,13 +465,13 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, // we have both the binary shader and the decompiled, so inject it into the // cache if (raw.GetProgramType() == ProgramType::VS) { - auto [conf, setup] = BuildVSConfigFromRaw(raw); + auto [conf, setup] = BuildVSConfigFromRaw(raw, driver); std::scoped_lock lock(mutex); - impl->programmable_vertex_shaders.Inject(conf, decomp->second.result.code, + impl->programmable_vertex_shaders.Inject(conf, decomp->second.code, std::move(shader)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(), - driver.HasBlendMinMaxFactor()); + PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false, + driver.HasBlendMinMaxFactor()); std::scoped_lock lock(mutex); impl->fragment_shaders.Inject(conf, std::move(shader)); } else { @@ -566,24 +568,24 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, bool sanitize_mul = false; GLuint handle{0}; - std::optional result; + std::string code; // Otherwise decompile and build the shader at boot and save the result to the // precompiled file if (raw.GetProgramType() == ProgramType::VS) { - auto [conf, setup] = BuildVSConfigFromRaw(raw); - result = GenerateVertexShader(setup, conf, impl->separable); + auto [conf, setup] = BuildVSConfigFromRaw(raw, driver); + code = GLSL::GenerateVertexShader(setup, conf, impl->separable); OGLShaderStage stage{impl->separable}; - stage.Create(result->code.c_str(), GL_VERTEX_SHADER); + stage.Create(code.c_str(), GL_VERTEX_SHADER); handle = stage.GetHandle(); sanitize_mul = conf.state.sanitize_mul; std::scoped_lock lock(mutex); - impl->programmable_vertex_shaders.Inject(conf, result->code, std::move(stage)); + impl->programmable_vertex_shaders.Inject(conf, code, std::move(stage)); } else if (raw.GetProgramType() == ProgramType::FS) { - PicaFSConfig conf = PicaFSConfig::BuildFromRegs(raw.GetRawShaderConfig(), - driver.HasBlendMinMaxFactor()); - result = GenerateFragmentShader(conf, impl->separable); + PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false, + driver.HasBlendMinMaxFactor()); + code = GLSL::GenerateFragmentShader(conf, impl->separable); OGLShaderStage stage{impl->separable}; - stage.Create(result->code.c_str(), GL_FRAGMENT_SHADER); + stage.Create(code.c_str(), GL_FRAGMENT_SHADER); handle = stage.GetHandle(); std::scoped_lock lock(mutex); impl->fragment_shaders.Inject(conf, std::move(stage)); @@ -602,8 +604,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading, std::scoped_lock lock(mutex); // If this is a new separable shader, add it the precompiled cache - if (result) { - disk_cache.SaveDecompiled(unique_identifier, *result, sanitize_mul); + if (!code.empty()) { + disk_cache.SaveDecompiled(unique_identifier, code, sanitize_mul); disk_cache.SaveDump(unique_identifier, handle); precompiled_cache_altered = true; } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 4b7104be7..16c1b2142 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,6 +24,12 @@ namespace OpenGL { class Driver; class OpenGLState; +enum UniformBindings { + VSPicaData = 0, + VSData = 1, + FSData = 2, +}; + /// A class that manage different shader stages and configures them with given config data. class ShaderProgramManager { public: diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 294c0b67e..50eafb436 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -14,9 +14,9 @@ namespace OpenGL { GLuint LoadShader(std::string_view source, GLenum type) { - const std::string version = GLES ? R"(#version 320 es - -#define CITRA_GLES + std::string preamble; + if (GLES) { + preamble = R"(#version 320 es #if defined(GL_ANDROID_extension_pack_es31a) #extension GL_ANDROID_extension_pack_es31a : enable @@ -25,8 +25,10 @@ GLuint LoadShader(std::string_view source, GLenum type) { #if defined(GL_EXT_clip_cull_distance) #extension GL_EXT_clip_cull_distance : enable #endif // defined(GL_EXT_clip_cull_distance) -)" - : "#version 430 core\n"; +)"; + } else { + preamble = "#version 430 core\n"; + } std::string_view debug_type; switch (type) { @@ -43,8 +45,8 @@ GLuint LoadShader(std::string_view source, GLenum type) { UNREACHABLE(); } - std::array src_arr{version.data(), source.data()}; - std::array lengths{static_cast(version.size()), + std::array src_arr{preamble.data(), source.data()}; + std::array lengths{static_cast(preamble.size()), static_cast(source.size())}; GLuint shader_id = glCreateShader(type); glShaderSource(shader_id, static_cast(src_arr.size()), src_arr.data(), lengths.data()); diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 78c6fb71e..3b52253b8 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -9,21 +9,6 @@ namespace OpenGL { -// High precision may or may not supported in GLES3. If it isn't, use medium precision instead. -static constexpr char fragment_shader_precision_OES[] = R"( -#ifdef GL_FRAGMENT_PRECISION_HIGH -precision highp int; -precision highp float; -precision highp samplerBuffer; -precision highp uimage2D; -#else -precision mediump int; -precision mediump float; -precision mediump samplerBuffer; -precision mediump uimage2D; -#endif // GL_FRAGMENT_PRECISION_HIGH -)"; - /** * Utility function to create and compile an OpenGL GLSL shader * @param source String of the GLSL shader program diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index be778c764..a4736c4cf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -11,12 +11,12 @@ #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "core/memory.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_mailbox.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/post_processing_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core/shader/generator/glsl_shader_gen.h" #include "video_core/video_core.h" #include "video_core/host_shaders/opengl_present_anaglyph_frag.h" @@ -387,11 +387,7 @@ void RendererOpenGL::InitOpenGLObjects() { void RendererOpenGL::ReloadShader() { // Link shaders and get variable locations - std::string shader_data; - if (GLES) { - shader_data += fragment_shader_precision_OES; - } - + std::string shader_data = fragment_shader_precision_OES; if (Settings::values.render_3d.GetValue() == Settings::StereoRenderOption::Anaglyph) { if (Settings::values.anaglyph_shader_name.GetValue() == "dubois (builtin)") { shader_data += HostShaders::OPENGL_PRESENT_ANAGLYPH_FRAG; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 13267b59b..8d27a45e5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,7 +5,8 @@ #include "common/thread_worker.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" -#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/shader/generator/spv_shader_gen.h" namespace Common { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6d4da7127..bb8628f9f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -14,19 +14,14 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +using namespace Pica::Shader::Generator; + MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32)); namespace Vulkan { -enum ProgramType : u32 { - VS = 0, - GS = 2, - FS = 1, -}; - u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { switch (format) { case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: @@ -52,14 +47,14 @@ AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat for } } -constexpr std::array BUFFER_BINDINGS = {{ +constexpr std::array BUFFER_BINDINGS = {{ {0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex}, {1, vk::DescriptorType::eUniformBufferDynamic, 1, - vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry | - vk::ShaderStageFlagBits::eFragment}, - {2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry}, + {2, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eFragment}, {3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, {4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + {5, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, }}; constexpr std::array TEXTURE_BINDINGS = {{ @@ -88,8 +83,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS}, DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS}, DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}}, - trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex, - GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} { + trivial_vertex_shader{ + instance, vk::ShaderStageFlagBits::eVertex, + GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} { BuildLayout(); } @@ -294,8 +290,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { - PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance}; - config.state.use_geometry_shader = instance.UseGeometryShaders(); + PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), + instance.UseGeometryShaders()}; for (u32 i = 0; i < layout.attribute_count; i++) { const VertexAttribute& attr = layout.attributes[i]; @@ -313,14 +309,13 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, auto [it, new_config] = programmable_vertex_map.try_emplace(config); if (new_config) { - auto code = GenerateVertexShader(setup, config); - if (!code) { + auto program = GLSL::GenerateVertexShader(setup, config, true); + if (program.empty()) { LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); programmable_vertex_map[config] = nullptr; return false; } - std::string& program = code.value(); auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance); auto& shader = iter->second; @@ -359,13 +354,13 @@ bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { return true; } - const PicaFixedGSConfig gs_config{regs, instance}; + const PicaFixedGSConfig gs_config{regs, instance.IsShaderClipDistanceSupported()}; auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance); auto& shader = it->second; if (new_shader) { workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() { - const std::string code = GenerateFixedGeometryShader(gs_config); + const auto code = GLSL::GenerateFixedGeometryShader(gs_config, true); shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device); shader.MarkDone(); }); @@ -383,7 +378,9 @@ void PipelineCache::UseTrivialGeometryShader() { } void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { - const PicaFSConfig config{regs, instance}; + const PicaFSConfig config{regs, instance.IsFragmentShaderInterlockSupported(), + instance.NeedsLogicOpEmulation(), + !instance.IsCustomBorderColorSupported(), false}; const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance); auto& shader = it->second; @@ -395,12 +392,12 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube || config.state.shadow_rendering.Value(); if (use_spirv && !is_shadow) { - const std::vector code = GenerateFragmentShaderSPV(config); + const std::vector code = SPIRV::GenerateFragmentShader(config); shader.module = CompileSPV(code, instance.GetDevice()); shader.MarkDone(); } else { workers.QueueWork([config, device = instance.GetDevice(), &shader]() { - const std::string code = GenerateFragmentShader(config); + const std::string code = GLSL::GenerateFragmentShader(config, true); shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device); shader.MarkDone(); }); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 954f4b3e9..1a54a0f38 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -9,6 +9,8 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/shader/generator/spv_shader_gen.h" namespace Pica { struct Regs; @@ -22,7 +24,7 @@ class RenderpassCache; class DescriptorPool; constexpr u32 NUM_RASTERIZER_SETS = 3; -constexpr u32 NUM_DYNAMIC_OFFSETS = 2; +constexpr u32 NUM_DYNAMIC_OFFSETS = 3; /** * Stores a collection of rasterizer pipelines used during rendering. @@ -113,10 +115,10 @@ private: std::array shader_hashes; std::array current_shaders; - std::unordered_map programmable_vertex_map; + std::unordered_map programmable_vertex_map; std::unordered_map programmable_vertex_cache; - std::unordered_map fixed_geometry_shaders; - std::unordered_map fragment_shaders; + std::unordered_map fixed_geometry_shaders; + std::unordered_map fragment_shaders; Shader trivial_vertex_shader; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5511f4335..68091a77f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -28,6 +28,8 @@ MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); using TriangleTopology = Pica::PipelineRegs::TriangleTopology; using VideoCore::SurfaceType; +using namespace Pica::Shader::Generator; + constexpr u64 STREAM_BUFFER_SIZE = 64 * 1024 * 1024; constexpr u64 UNIFORM_BUFFER_SIZE = 4 * 1024 * 1024; constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; @@ -76,10 +78,10 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, vertex_buffers.fill(stream_buffer.Handle()); uniform_buffer_alignment = instance.UniformMinAlignment(); - uniform_size_aligned_vs = - Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); - uniform_size_aligned_fs = - Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + uniform_size_aligned_vs_pica = + Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); + uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); // Define vertex layout for software shaders MakeSoftwareVertexLayout(); @@ -107,11 +109,12 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize // all descriptor sets even the ones we don't use. - pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::VSUniformData)); - pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::UniformData)); - pipeline_cache.BindTexelBuffer(2, *texture_lf_view); - pipeline_cache.BindTexelBuffer(3, *texture_rg_view); - pipeline_cache.BindTexelBuffer(4, *texture_rgba_view); + pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); + pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); + pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); + pipeline_cache.BindTexelBuffer(3, *texture_lf_view); + pipeline_cache.BindTexelBuffer(4, *texture_rg_view); + pipeline_cache.BindTexelBuffer(5, *texture_rgba_view); Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID); @@ -140,7 +143,6 @@ void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, } void RasterizerVulkan::SyncFixedState() { - SyncClipEnabled(); SyncCullMode(); SyncBlendEnabled(); SyncBlendFuncs(); @@ -478,16 +480,16 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); - if (uniform_block_data.data.scissor_x1 != scissor_x1 || - uniform_block_data.data.scissor_x2 != scissor_x2 || - uniform_block_data.data.scissor_y1 != scissor_y1 || - uniform_block_data.data.scissor_y2 != scissor_y2) { + if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || + fs_uniform_block_data.data.scissor_x2 != scissor_x2 || + fs_uniform_block_data.data.scissor_y1 != scissor_y1 || + fs_uniform_block_data.data.scissor_y2 != scissor_y2) { - uniform_block_data.data.scissor_x1 = scissor_x1; - uniform_block_data.data.scissor_x2 = scissor_x2; - uniform_block_data.data.scissor_y1 = scissor_y1; - uniform_block_data.data.scissor_y2 = scissor_y2; - uniform_block_data.dirty = true; + fs_uniform_block_data.data.scissor_x1 = scissor_x1; + fs_uniform_block_data.data.scissor_x2 = scissor_x2; + fs_uniform_block_data.data.scissor_y1 = scissor_y1; + fs_uniform_block_data.data.scissor_y2 = scissor_y2; + fs_uniform_block_data.dirty = true; } // Sync and bind the texture surfaces @@ -670,11 +672,6 @@ void RasterizerVulkan::UnbindSpecial() { void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { switch (id) { - // Clipping plane - case PICA_REG_INDEX(rasterizer.clip_enable): - SyncClipEnabled(); - break; - // Culling case PICA_REG_INDEX(rasterizer.cull_mode): SyncCullMode(); @@ -831,14 +828,6 @@ void RasterizerVulkan::MakeSoftwareVertexLayout() { } } -void RasterizerVulkan::SyncClipEnabled() { - bool clip_enabled = regs.rasterizer.clip_enable != 0; - if (clip_enabled != uniform_block_data.data.enable_clip1) { - uniform_block_data.data.enable_clip1 = clip_enabled; - uniform_block_data.dirty = true; - } -} - void RasterizerVulkan::SyncCullMode() { pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); } @@ -946,7 +935,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(Common::Vec2f) * 128; // fog - if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) { return; } @@ -954,9 +943,9 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); // Sync the lighting luts - if (uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { - if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { + if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { std::array new_data; const auto& source_lut = Pica::g_state.lighting.luts[index]; std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), @@ -968,19 +957,19 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { lighting_lut_data[index] = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); - uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } - uniform_block_data.lighting_lut_dirty[index] = false; + fs_uniform_block_data.lighting_lut_dirty[index] = false; } } - uniform_block_data.lighting_lut_dirty_any = false; + fs_uniform_block_data.lighting_lut_dirty_any = false; } // Sync the fog lut - if (uniform_block_data.fog_lut_dirty || invalidate) { + if (fs_uniform_block_data.fog_lut_dirty || invalidate) { std::array new_data; std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), @@ -992,12 +981,12 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { fog_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); - uniform_block_data.data.fog_lut_offset = + fs_uniform_block_data.data.fog_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } - uniform_block_data.fog_lut_dirty = false; + fs_uniform_block_data.fog_lut_dirty = false; } texture_lf_buffer.Commit(static_cast(bytes_used)); @@ -1010,10 +999,10 @@ void RasterizerVulkan::SyncAndUploadLUTs() { sizeof(Common::Vec4f) * 256 + // proctex sizeof(Common::Vec4f) * 256; // proctex diff - if (!uniform_block_data.proctex_noise_lut_dirty && - !uniform_block_data.proctex_color_map_dirty && - !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && - !uniform_block_data.proctex_diff_lut_dirty) { + if (!fs_uniform_block_data.proctex_noise_lut_dirty && + !fs_uniform_block_data.proctex_color_map_dirty && + !fs_uniform_block_data.proctex_alpha_map_dirty && + !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) { return; } @@ -1035,34 +1024,34 @@ void RasterizerVulkan::SyncAndUploadLUTs() { std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec2f)); lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec2f); } }; // Sync the proctex noise lut - if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data, - uniform_block_data.data.proctex_noise_lut_offset); - uniform_block_data.proctex_noise_lut_dirty = false; + fs_uniform_block_data.data.proctex_noise_lut_offset); + fs_uniform_block_data.proctex_noise_lut_dirty = false; } // Sync the proctex color map - if (uniform_block_data.proctex_color_map_dirty || invalidate) { + if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data, - uniform_block_data.data.proctex_color_map_offset); - uniform_block_data.proctex_color_map_dirty = false; + fs_uniform_block_data.data.proctex_color_map_offset); + fs_uniform_block_data.proctex_color_map_dirty = false; } // Sync the proctex alpha map - if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data, - uniform_block_data.data.proctex_alpha_map_offset); - uniform_block_data.proctex_alpha_map_dirty = false; + fs_uniform_block_data.data.proctex_alpha_map_offset); + fs_uniform_block_data.proctex_alpha_map_dirty = false; } // Sync the proctex lut - if (uniform_block_data.proctex_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { std::array new_data; std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(), @@ -1075,16 +1064,16 @@ void RasterizerVulkan::SyncAndUploadLUTs() { proctex_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec4f)); - uniform_block_data.data.proctex_lut_offset = + fs_uniform_block_data.data.proctex_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec4f); } - uniform_block_data.proctex_lut_dirty = false; + fs_uniform_block_data.proctex_lut_dirty = false; } // Sync the proctex difference lut - if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { std::array new_data; std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(), @@ -1097,48 +1086,59 @@ void RasterizerVulkan::SyncAndUploadLUTs() { proctex_diff_lut_data = new_data; std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(Common::Vec4f)); - uniform_block_data.data.proctex_diff_lut_offset = + fs_uniform_block_data.data.proctex_diff_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - uniform_block_data.dirty = true; + fs_uniform_block_data.dirty = true; bytes_used += new_data.size() * sizeof(Common::Vec4f); } - uniform_block_data.proctex_diff_lut_dirty = false; + fs_uniform_block_data.proctex_diff_lut_dirty = false; } texture_buffer.Commit(static_cast(bytes_used)); } void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { - const bool sync_vs = accelerate_draw; - const bool sync_fs = uniform_block_data.dirty; - - if (!sync_vs && !sync_fs) { + const bool sync_vs_pica = accelerate_draw; + const bool sync_vs = vs_uniform_block_data.dirty; + const bool sync_fs = fs_uniform_block_data.dirty; + if (!sync_vs_pica && !sync_vs && !sync_fs) { return; } - const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + const u64 uniform_size = + uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs; auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); u32 used_bytes = 0; - if (sync_vs) { - Pica::Shader::VSUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); - std::memcpy(uniforms, &vs_uniforms, sizeof(vs_uniforms)); - pipeline_cache.SetBufferOffset(0, offset); + if (sync_vs || invalidate) { + std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, + sizeof(vs_uniform_block_data.data)); + + pipeline_cache.SetBufferOffset(1, offset + used_bytes); + vs_uniform_block_data.dirty = false; used_bytes += static_cast(uniform_size_aligned_vs); } if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &uniform_block_data.data, - sizeof(Pica::Shader::UniformData)); + std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, + sizeof(fs_uniform_block_data.data)); - pipeline_cache.SetBufferOffset(1, offset + used_bytes); - uniform_block_data.dirty = false; + pipeline_cache.SetBufferOffset(2, offset + used_bytes); + fs_uniform_block_data.dirty = false; used_bytes += static_cast(uniform_size_aligned_fs); } + if (sync_vs_pica) { + VSPicaUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); + + pipeline_cache.SetBufferOffset(0, offset + used_bytes); + used_bytes += static_cast(uniform_size_aligned_vs_pica); + } + uniform_buffer.Commit(used_bytes); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4d5faee60..cd7620c49 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -60,9 +60,6 @@ public: private: void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; - /// Syncs the clip enabled status to match the PICA register - void SyncClipEnabled(); - /// Syncs the cull mode to match the PICA register void SyncCullMode(); @@ -163,6 +160,7 @@ private: vk::UniqueBufferView texture_rg_view; vk::UniqueBufferView texture_rgba_view; u64 uniform_buffer_alignment; + u64 uniform_size_aligned_vs_pica; u64 uniform_size_aligned_vs; u64 uniform_size_aligned_fs; bool async_shaders{false}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/shader/generator/glsl_shader_decompiler.cpp similarity index 97% rename from src/video_core/renderer_opengl/gl_shader_decompiler.cpp rename to src/video_core/shader/generator/glsl_shader_decompiler.cpp index 6c01bfc70..ee012589b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/shader/generator/glsl_shader_decompiler.cpp @@ -12,9 +12,9 @@ #include #include "common/assert.h" #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/shader/generator/glsl_shader_decompiler.h" -namespace OpenGL::ShaderDecompiler { +namespace Pica::Shader::Generator::GLSL { using nihstro::DestRegister; using nihstro::Instruction; @@ -939,34 +939,20 @@ private: ShaderWriter shader; }; -std::string GetCommonDeclarations() { - return R"( -struct pica_uniforms { - bool b[16]; - uvec4 i[4]; - vec4 f[96]; -}; - -bool exec_shader(); - -)"; -} - -std::optional DecompileProgram(const Pica::Shader::ProgramCode& program_code, - const Pica::Shader::SwizzleData& swizzle_data, - u32 main_offset, const RegGetter& inputreg_getter, - const RegGetter& outputreg_getter, - bool sanitize_mul) { +std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code, + const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, + const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, + bool sanitize_mul) { try { auto subroutines = ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines(); GLSLGenerator generator(subroutines, program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, sanitize_mul); - return {ProgramResult{generator.MoveShaderCode()}}; + return generator.MoveShaderCode(); } catch (const DecompileFail& exception) { LOG_INFO(HW_GPU, "Shader decompilation failed: {}", exception.what()); - return std::nullopt; + return ""; } } -} // namespace OpenGL::ShaderDecompiler +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/glsl_shader_decompiler.h b/src/video_core/shader/generator/glsl_shader_decompiler.h new file mode 100644 index 000000000..933bc4df8 --- /dev/null +++ b/src/video_core/shader/generator/glsl_shader_decompiler.h @@ -0,0 +1,21 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/shader/shader.h" + +namespace Pica::Shader::Generator::GLSL { + +using RegGetter = std::function; + +std::string DecompileProgram(const Pica::Shader::ProgramCode& program_code, + const Pica::Shader::SwizzleData& swizzle_data, u32 main_offset, + const RegGetter& inputreg_getter, const RegGetter& outputreg_getter, + bool sanitize_mul); + +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/shader/generator/glsl_shader_gen.cpp similarity index 74% rename from src/video_core/renderer_vulkan/vk_shader_gen.cpp rename to src/video_core/shader/generator/glsl_shader_gen.cpp index 885213b3c..33ebf5a58 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_shader_gen.cpp @@ -4,17 +4,12 @@ #include #include -#include "common/bit_set.h" #include "common/logging/log.h" #include "core/core.h" #include "core/telemetry_session.h" -#include "video_core/pica_state.h" -#include "video_core/regs_framebuffer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_shader_gen.h" -#include "video_core/shader/shader_uniforms.h" -#include "video_core/video_core.h" +#include "video_core/shader/generator/glsl_shader_decompiler.h" +#include "video_core/shader/generator/glsl_shader_gen.h" +#include "video_core/shader/generator/shader_uniforms.h" using Pica::FramebufferRegs; using Pica::LightingRegs; @@ -23,15 +18,95 @@ using Pica::TexturingRegs; using TevStageConfig = TexturingRegs::TevStageConfig; using VSOutputAttributes = RasterizerRegs::VSOutputAttributes; -namespace Vulkan { +namespace Pica::Shader::Generator::GLSL { -const std::string UniformBlockDef = Pica::Shader::BuildShaderUniformDefinitions("binding = 1,"); +constexpr std::string_view VSPicaUniformBlockDef = R"( +struct pica_uniforms { + bool b[16]; + uvec4 i[4]; + vec4 f[96]; +}; -static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_planes = false) { +#ifdef VULKAN +layout (set = 0, binding = 0, std140) uniform vs_pica_data { +#else +layout (binding = 0, std140) uniform vs_pica_data { +#endif + pica_uniforms uniforms; +}; +)"; + +constexpr std::string_view VSUniformBlockDef = R"( +#ifdef VULKAN +layout (set = 0, binding = 1, std140) uniform vs_data { +#else +layout (binding = 1, std140) uniform vs_data { +#endif + bool enable_clip1; + vec4 clip_coef; +}; +)"; + +constexpr std::string_view FSUniformBlockDef = R"( +#define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define NUM_LIGHTING_SAMPLERS 24 +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spot_direction; + float dist_atten_bias; + float dist_atten_scale; +}; +#ifdef VULKAN +layout (set = 0, binding = 2, std140) uniform fs_data { +#else +layout (binding = 2, std140) uniform fs_data { +#endif + int framebuffer_scale; + int alphatest_ref; + float depth_scale; + float depth_offset; + float shadow_bias_constant; + float shadow_bias_linear; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + int fog_lut_offset; + int proctex_noise_lut_offset; + int proctex_color_map_offset; + int proctex_alpha_map_offset; + int proctex_lut_offset; + int proctex_diff_lut_offset; + float proctex_bias; + int shadow_texture_bias; + ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; + vec3 fog_color; + vec2 proctex_noise_f; + vec2 proctex_noise_a; + vec2 proctex_noise_p; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; + vec3 tex_lod_bias; + vec4 tex_border_color[3]; + vec4 blend_color; +}; +)"; + +static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_planes, + bool separable_shader) { std::string out; const auto append_variable = [&](std::string_view var, int location) { - out += fmt::format("layout (location={}) ", location); + if (separable_shader) { + out += fmt::format("layout (location={}) ", location); + } out += fmt::format("{}{};\n", is_output ? "out " : "in ", var); }; @@ -43,7 +118,7 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_p append_variable("vec4 normquat", ATTRIBUTE_NORMQUAT); append_variable("vec3 view", ATTRIBUTE_VIEW); - if (is_output) { + if (is_output && separable_shader) { // gl_PerVertex redeclaration is required for separate shader object out += "out gl_PerVertex {\n"; out += " invariant vec4 gl_Position;\n"; @@ -56,263 +131,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_p return out; } -PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { - state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode); - - state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable); - - state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable - ? regs.framebuffer.output_merger.alpha_test.func.Value() - : FramebufferRegs::CompareFunc::Always); - - state.texture0_type.Assign(regs.texturing.texture0.type); - - state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); - - const auto pica_textures = regs.texturing.GetTextures(); - for (u32 tex_index = 0; tex_index < 3; tex_index++) { - const auto config = pica_textures[tex_index].config; - state.texture_border_color[tex_index].enable_s.Assign( - !instance.IsCustomBorderColorSupported() && - config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); - state.texture_border_color[tex_index].enable_t.Assign( - !instance.IsCustomBorderColorSupported() && - config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder); - } - - // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs - const bool emulate_logic_op = instance.NeedsLogicOpEmulation() && - !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable; - - state.emulate_logic_op.Assign(emulate_logic_op); - if (emulate_logic_op) { - state.logic_op.Assign(regs.framebuffer.output_merger.logic_op); - } else { - state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp); - } - - // Copy relevant tev stages fields. - // We don't sync const_color here because of the high variance, it is a - // shader uniform instead. - const auto& tev_stages = regs.texturing.GetTevStages(); - DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); - for (std::size_t i = 0; i < tev_stages.size(); i++) { - const auto& tev_stage = tev_stages[i]; - state.tev_stages[i].sources_raw = tev_stage.sources_raw; - state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - state.tev_stages[i].ops_raw = tev_stage.ops_raw; - state.tev_stages[i].scales_raw = tev_stage.scales_raw; - if (tev_stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - state.tev_stages[i].sources_raw &= 0xFFF; - state.tev_stages[i].modifiers_raw &= 0xFFF; - state.tev_stages[i].ops_raw &= 0xF; - } - } - - state.fog_mode.Assign(regs.texturing.fog_mode); - state.fog_flip.Assign(regs.texturing.fog_flip != 0); - - state.combiner_buffer_input.Assign( - regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | - regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4); - - // Fragment lighting - state.lighting.enable.Assign(!regs.lighting.disable); - if (state.lighting.enable) { - state.lighting.src_num.Assign(regs.lighting.max_light_index + 1); - - for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) { - const u32 num = regs.lighting.light_enable.GetNum(light_index); - const auto& light = regs.lighting.light[num]; - state.lighting.light[light_index].num.Assign(num); - state.lighting.light[light_index].directional.Assign(light.config.directional != 0); - state.lighting.light[light_index].two_sided_diffuse.Assign( - light.config.two_sided_diffuse != 0); - state.lighting.light[light_index].geometric_factor_0.Assign( - light.config.geometric_factor_0 != 0); - state.lighting.light[light_index].geometric_factor_1.Assign( - light.config.geometric_factor_1 != 0); - state.lighting.light[light_index].dist_atten_enable.Assign( - !regs.lighting.IsDistAttenDisabled(num)); - state.lighting.light[light_index].spot_atten_enable.Assign( - !regs.lighting.IsSpotAttenDisabled(num)); - state.lighting.light[light_index].shadow_enable.Assign( - !regs.lighting.IsShadowDisabled(num)); - } - - state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0); - if (state.lighting.lut_d0.enable) { - state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0); - state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value()); - state.lighting.lut_d0.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - } - - state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0); - if (state.lighting.lut_d1.enable) { - state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0); - state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value()); - state.lighting.lut_d1.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - } - - // this is a dummy field due to lack of the corresponding register - state.lighting.lut_sp.enable.Assign(1); - state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0); - state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value()); - state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); - - state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0); - if (state.lighting.lut_fr.enable) { - state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0); - state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value()); - state.lighting.lut_fr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - } - - state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0); - if (state.lighting.lut_rr.enable) { - state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0); - state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value()); - state.lighting.lut_rr.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - } - - state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0); - if (state.lighting.lut_rg.enable) { - state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0); - state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value()); - state.lighting.lut_rg.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - } - - state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0); - if (state.lighting.lut_rb.enable) { - state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0); - state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value()); - state.lighting.lut_rb.scale = - regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - } - - state.lighting.config.Assign(regs.lighting.config0.config); - state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha); - state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha); - state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode); - state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector); - state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0); - state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0); - - state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0); - if (state.lighting.enable_shadow) { - state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0); - state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0); - state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0); - state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0); - state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector); - } - } - - state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable); - if (state.proctex.enable) { - state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates); - state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp); - state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp); - state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner); - state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner); - state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha); - state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable); - state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift); - state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift); - state.proctex.lut_width = regs.texturing.proctex_lut.width; - state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; - state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; - state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; - state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; - state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; - state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; - state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter); - } - - state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode == - FramebufferRegs::FragmentOperationMode::Shadow); - state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); - - // We only need fragment shader interlock when shadow rendering. - state.use_fragment_shader_interlock.Assign(state.shadow_rendering && - instance.IsFragmentShaderInterlockSupported()); -} - -void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, - const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - program_hash = setup.GetProgramCodeHash(); - swizzle_hash = setup.GetSwizzleDataHash(); - main_offset = regs.main_offset; - sanitize_mul = VideoCore::g_hw_shader_accurate_mul; - - num_outputs = 0; - load_flags.fill(AttribLoadFlags::Float); - output_map.fill(16); - - for (int reg : Common::BitSet(regs.output_mask)) { - output_map[reg] = num_outputs++; - } - - vs_output_attributes = Common::BitSet(regs.output_mask).Count(); - gs_output_attributes = vs_output_attributes; - - semantic_maps.fill({16, 0}); - for (u32 attrib = 0; attrib < rasterizer.vs_output_total; ++attrib) { - const std::array semantics{ - rasterizer.vs_output_attributes[attrib].map_x.Value(), - rasterizer.vs_output_attributes[attrib].map_y.Value(), - rasterizer.vs_output_attributes[attrib].map_z.Value(), - rasterizer.vs_output_attributes[attrib].map_w.Value(), - }; - for (u32 comp = 0; comp < 4; ++comp) { - const auto semantic = semantics[comp]; - if (static_cast(semantic) < 24) { - semantic_maps[static_cast(semantic)] = {attrib, comp}; - } else if (semantic != VSOutputAttributes::INVALID) { - LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); - } - } - } -} - -PicaVSConfig::PicaVSConfig(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs, - Pica::Shader::ShaderSetup& setup, const Instance& instance) { - state.Init(rasterizer, regs, setup); - use_clip_planes = instance.IsShaderClipDistanceSupported(); -} - -void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { - vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); - gs_output_attributes = vs_output_attributes; - - semantic_maps.fill({16, 0}); - for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { - const std::array semantics{ - regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), - regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), - }; - for (u32 comp = 0; comp < 4; ++comp) { - const auto semantic = semantics[comp]; - if (static_cast(semantic) < 24) { - semantic_maps[static_cast(semantic)] = {attrib, comp}; - } else if (semantic != VSOutputAttributes::INVALID) { - LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); - } - } - } -} - -PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs, const Instance& instance) { - state.Init(regs); - use_clip_planes = instance.IsShaderClipDistanceSupported(); -} - /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) static bool IsPassThroughTevStage(const TevStageConfig& stage) { return (stage.color_op == TevStageConfig::Operation::Replace && @@ -361,7 +179,7 @@ static void AppendSource(std::string& out, const PicaFSConfig& config, break; default: out += "vec4(0.0)"; - LOG_CRITICAL(Render_OpenGL, "Unknown source op {}", source); + LOG_CRITICAL(Render, "Unknown source op {}", source); break; } } @@ -419,7 +237,7 @@ static void AppendColorModifier(std::string& out, const PicaFSConfig& config, break; default: out += "vec3(0.0)"; - LOG_CRITICAL(Render_OpenGL, "Unknown color modifier op {}", modifier); + LOG_CRITICAL(Render, "Unknown color modifier op {}", modifier); break; } } @@ -468,7 +286,7 @@ static void AppendAlphaModifier(std::string& out, const PicaFSConfig& config, break; default: out += "0.0"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha modifier op {}", modifier); + LOG_CRITICAL(Render, "Unknown alpha modifier op {}", modifier); break; } } @@ -500,7 +318,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper case Operation::Dot3_RGBA: return "vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)"; default: - LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: {}", operation); + LOG_CRITICAL(Render, "Unknown color combiner operation: {}", operation); return "vec3(0.0)"; } }; @@ -541,7 +359,7 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper break; default: out += "0.0"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha combiner operation: {}", operation); + LOG_CRITICAL(Render, "Unknown alpha combiner operation: {}", operation); break; } out += ", 0.0, 1.0)"; @@ -571,7 +389,7 @@ static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareF default: out += "false"; - LOG_CRITICAL(Render_OpenGL, "Unknown alpha test condition {}", func); + LOG_CRITICAL(Render, "Unknown alpha test condition {}", func); break; } } @@ -651,38 +469,45 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) { return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector.Value()); }; - switch (lighting.bump_mode) { - case LightingRegs::LightingBumpMode::NormalMap: { - // Bump mapping is enabled using a normal map - out += fmt::format("vec3 surface_normal = {};\n", perturbation()); - - // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher - // precision result - if (lighting.bump_renorm) { - constexpr std::string_view val = - "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; - out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); - } - - // The tangent vector is not perturbed by the normal map and is just a unit vector. + if (config.state.use_custom_normal_map) { + const std::string normal_texel = + fmt::format("2.0 * (texture(tex_normal, texcoord0)).rgb - 1.0"); + out += fmt::format("vec3 surface_normal = {};\n", normal_texel); out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; - break; - } - case LightingRegs::LightingBumpMode::TangentMap: { - // Bump mapping is enabled using a tangent map - out += fmt::format("vec3 surface_tangent = {};\n", perturbation()); - // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant - // computation below, which is also confirmed on 3DS. So we don't bother recomputing here - // even if 'renorm' is enabled. + } else { + switch (lighting.bump_mode) { + case LightingRegs::LightingBumpMode::NormalMap: { + // Bump mapping is enabled using a normal map + out += fmt::format("vec3 surface_normal = {};\n", perturbation()); - // The normal vector is not perturbed by the tangent map and is just a unit vector. - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; - break; - } - default: - // No bump mapping - surface local normal and tangent are just unit vectors - out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" - "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher + // precision result + if (lighting.bump_renorm) { + constexpr std::string_view val = "(1.0 - (surface_normal.x*surface_normal.x + " + "surface_normal.y*surface_normal.y))"; + out += fmt::format("surface_normal.z = sqrt(max({}, 0.0));\n", val); + } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + break; + } + case LightingRegs::LightingBumpMode::TangentMap: { + // Bump mapping is enabled using a tangent map + out += fmt::format("vec3 surface_tangent = {};\n", perturbation()); + // Mathematically, recomputing Z-component of the tangent vector won't affect the + // relevant computation below, which is also confirmed on 3DS. So we don't bother + // recomputing here even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + break; + } + default: + // No bump mapping - surface local normal and tangent are just unit vectors + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n" + "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; + } } // Rotate the surface-local normal by the interpolated normal quaternion to convert it to @@ -1120,7 +945,7 @@ float ProcTexNoiseCoef(vec2 x) { if (config.state.proctex.coord < 3) { out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord.Value()); } else { - LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3"); + LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3"); out += "vec2 uv = abs(texcoord0);\n"; } @@ -1197,12 +1022,110 @@ float ProcTexNoiseCoef(vec2 x) { } } -std::string GenerateFragmentShader(const PicaFSConfig& config) { - const auto& state = config.state; - std::string out = R"( -#version 450 core -#extension GL_ARB_separate_shader_objects : enable +static void WriteLogicOp(std::string& out, const PicaFSConfig& config) { + if (!config.state.emulate_logic_op) { + return; + } + switch (config.state.logic_op) { + case FramebufferRegs::LogicOp::Clear: + out += "color = vec4(0);\n"; + break; + case FramebufferRegs::LogicOp::Set: + out += "color = vec4(1);\n"; + break; + case FramebufferRegs::LogicOp::Copy: + // Take the color output as-is + break; + case FramebufferRegs::LogicOp::CopyInverted: + out += "color = ~color;\n"; + break; + case FramebufferRegs::LogicOp::NoOp: + // We need to discard the color, but not necessarily the depth. This is not possible + // with fragment shader alone, so we emulate this behavior on GLES with glColorMask. + break; + default: + LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", config.state.logic_op.Value()); + UNIMPLEMENTED(); + } +} + +static void WriteBlending(std::string& out, const PicaFSConfig& config) { + if (!config.state.rgb_blend.emulate_blending && !config.state.alpha_blend.emulate_blending) + [[likely]] { + return; + } + + using BlendFactor = Pica::FramebufferRegs::BlendFactor; + out += R"( +vec4 source_color = last_tex_env_out; +#if defined(GL_EXT_shader_framebuffer_fetch) +vec4 dest_color = color; +#elif defined(GL_ARM_shader_framebuffer_fetch) +vec4 dest_color = gl_LastFragColorARM; +#else +vec4 dest_color = texelFetch(colorBuffer, ivec2(gl_FragCoord.xy), 0); +#endif )"; + const auto get_factor = [&](BlendFactor factor) -> std::string { + switch (factor) { + case BlendFactor::Zero: + return "vec4(0.f)"; + case BlendFactor::One: + return "vec4(1.f)"; + case BlendFactor::SourceColor: + return "source_color"; + case BlendFactor::OneMinusSourceColor: + return "vec4(1.f) - source_color"; + case BlendFactor::DestColor: + return "dest_color"; + case BlendFactor::OneMinusDestColor: + return "vec4(1.f) - dest_color"; + case BlendFactor::SourceAlpha: + return "source_color.aaaa"; + case BlendFactor::OneMinusSourceAlpha: + return "vec4(1.f) - source_color.aaaa"; + case BlendFactor::DestAlpha: + return "dest_color.aaaa"; + case BlendFactor::OneMinusDestAlpha: + return "vec4(1.f) - dest_color.aaaa"; + case BlendFactor::ConstantColor: + return "blend_color"; + case BlendFactor::OneMinusConstantColor: + return "vec4(1.f) - blend_color"; + case BlendFactor::ConstantAlpha: + return "blend_color.aaaa"; + case BlendFactor::OneMinusConstantAlpha: + return "vec4(1.f) - blend_color.aaaa"; + default: + LOG_CRITICAL(Render_OpenGL, "Unknown blend factor {}", factor); + return "vec4(1.f)"; + } + }; + const auto get_func = [](Pica::FramebufferRegs::BlendEquation eq) { + return eq == Pica::FramebufferRegs::BlendEquation::Min ? "min" : "max"; + }; + + if (config.state.rgb_blend.emulate_blending) { + out += fmt::format( + "last_tex_env_out.rgb = {}(source_color.rgb * ({}).rgb, dest_color.rgb * ({}).rgb);\n", + get_func(config.state.rgb_blend.eq), get_factor(config.state.rgb_blend.src_factor), + get_factor(config.state.rgb_blend.dst_factor)); + } + if (config.state.alpha_blend.emulate_blending) { + out += fmt::format( + "last_tex_env_out.a = {}(source_color.a * ({}).a, dest_color.a * ({}).a);\n", + get_func(config.state.alpha_blend.eq), get_factor(config.state.alpha_blend.src_factor), + get_factor(config.state.alpha_blend.dst_factor)); + } +} + +std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { + const auto& state = config.state; + std::string out; + + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } if (state.use_fragment_shader_interlock) { out += R"( @@ -1224,21 +1147,34 @@ layout(pixel_interlock_ordered) in; )"; } - out += GetVertexInterfaceDeclaration(false); + if (config.state.rgb_blend.emulate_blending || config.state.alpha_blend.emulate_blending) { + out += R"( +#if defined(GL_EXT_shader_framebuffer_fetch) +#extension GL_EXT_shader_framebuffer_fetch : enable +#elif defined(GL_ARM_shader_framebuffer_fetch) +#extension GL_ARM_shader_framebuffer_fetch : enable +#else +#define CITRA_EMULATED_BLENDING_FALLBACK 1 +#endif +)"; + } + + out += fragment_shader_precision_OES; + out += GetVertexInterfaceDeclaration(false, false, separable_shader); out += R"( -in vec4 gl_FragCoord; - layout (location = 0) out vec4 color; -layout(set = 0, binding = 2) uniform samplerBuffer texture_buffer_lut_lf; -layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_rg; -layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rgba; +#ifdef VULKAN +layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_lf; +layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rg; +layout(set = 0, binding = 5) uniform samplerBuffer texture_buffer_lut_rgba; layout(set = 1, binding = 0) uniform sampler2D tex0; layout(set = 1, binding = 1) uniform sampler2D tex1; layout(set = 1, binding = 2) uniform sampler2D tex2; layout(set = 1, binding = 3) uniform samplerCube tex_cube; +// TODO: Binding for custom normal maps, when supported by Vulkan. layout(set = 2, binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; layout(set = 2, binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; @@ -1247,9 +1183,31 @@ layout(set = 2, binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; layout(set = 2, binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; layout(set = 2, binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; layout(set = 2, binding = 6, r32ui) uniform uimage2D shadow_buffer; +#else +layout(binding = 0) uniform sampler2D tex0; +layout(binding = 1) uniform sampler2D tex1; +layout(binding = 2) uniform sampler2D tex2; +layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf; +layout(binding = 4) uniform samplerBuffer texture_buffer_lut_rg; +layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba; +layout(binding = 6) uniform samplerCube tex_cube; +layout(binding = 7) uniform sampler2D tex_normal; + +layout(binding = 0, r32ui) uniform readonly uimage2D shadow_texture_px; +layout(binding = 1, r32ui) uniform readonly uimage2D shadow_texture_nx; +layout(binding = 2, r32ui) uniform readonly uimage2D shadow_texture_py; +layout(binding = 3, r32ui) uniform readonly uimage2D shadow_texture_ny; +layout(binding = 4, r32ui) uniform readonly uimage2D shadow_texture_pz; +layout(binding = 5, r32ui) uniform readonly uimage2D shadow_texture_nz; +layout(binding = 6, r32ui) uniform uimage2D shadow_buffer; + +#if defined(CITRA_EMULATED_BLENDING_FALLBACK) +layout(location = 10) uniform sampler2D colorBuffer; +#endif +#endif )"; - out += UniformBlockDef; + out += FSUniformBlockDef; out += R"( // Rotate the vector v by the quaternion q @@ -1257,24 +1215,6 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); } -float LookupLightingLUT(int lut_index, int index, float delta) { - vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; - return entry.r + entry.g * delta; -} - -float LookupLightingLUTUnsigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 256.0), 0.f, 255.f)); - float delta = pos * 256.0 - float(index); - return LookupLightingLUT(lut_index, index, delta); -} - -float LookupLightingLUTSigned(int lut_index, float pos) { - int index = int(clamp(floor(pos * 128.0), -128.f, 127.f)); - float delta = pos * 128.0 - float(index); - if (index < 0) index += 256; - return LookupLightingLUT(lut_index, index, delta); -} - float byteround(float x) { return round(x * 255.0) * (1.0 / 255.0); } @@ -1307,7 +1247,10 @@ uvec2 DecodeShadow(uint pixel) { uint EncodeShadow(uvec2 pixel) { return (pixel.x << 8) | pixel.y; } +)"; + if (state.shadow_rendering) { + out += R"( uint UpdateShadow(uint pixel, uint d, uint s) { uvec2 ref = DecodeShadow(pixel); if (d < ref.x) { @@ -1320,31 +1263,60 @@ uint UpdateShadow(uint pixel, uint d, uint s) { } return EncodeShadow(ref); } +)"; + } + if (state.lighting.enable) { + out += R"( +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = int(clamp(floor(pos * 256.0), 0.f, 255.f)); + float delta = pos * 256.0 - float(index); + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = int(clamp(floor(pos * 128.0), -128.f, 127.f)); + float delta = pos * 128.0 - float(index); + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} +)"; + } + + if (state.texture0_type == TexturingRegs::TextureConfig::Shadow2D || + state.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { + out += R"( float CompareShadow(uint pixel, uint z) { uvec2 p = DecodeShadow(pixel); return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); } -float SampleShadow2D(ivec2 uv, uint z) { - if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) - return 1.0; - return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); -} - float mix2(vec4 s, vec2 a) { vec2 t = mix(s.xy, s.zw, a.yy); return mix(t.x, t.y, a.x); } +)"; + + if (state.texture0_type == TexturingRegs::TextureConfig::Shadow2D) { + out += R"( +float SampleShadow2D(ivec2 uv, uint z) { + if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) ))) + return 1.0; + return CompareShadow(imageLoad(shadow_texture_px, uv).x, z); +} vec4 shadowTexture(vec2 uv, float w) { )"; - - if (!config.state.shadow_texture_orthographic) { - out += "uv /= w;"; - } - out += "uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));"; - out += R"( + if (!config.state.shadow_texture_orthographic) { + out += "uv /= w;"; + } + out += R"( + uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias)); vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5); vec2 coord_floor = floor(coord); vec2 f = coord - coord_floor; @@ -1356,7 +1328,9 @@ vec4 shadowTexture(vec2 uv, float w) { SampleShadow2D(i + ivec2(1, 1), z)); return vec4(mix2(s, f)); } - +)"; + } else if (state.texture0_type == TexturingRegs::TextureConfig::ShadowCube) { + out += R"( vec4 shadowTextureCube(vec2 uv, float w) { ivec2 size = imageSize(shadow_texture_px); vec3 c = vec3(uv, w); @@ -1438,43 +1412,39 @@ vec4 shadowTextureCube(vec2 uv, float w) { return vec4(mix2(s, f)); } )"; + } + } if (config.state.proctex.enable) { AppendProcTexSampler(out, config); } for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) { - out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit); + out += fmt::format("vec4 sampleTexUnit{}() {{\n", texture_unit); if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) { - out += "return vec4(0.0);}"; - continue; - } else if (texture_unit == 3) { - if (state.proctex.enable) { - out += "return ProcTex();}"; - } else { - out += "return vec4(0.0);}"; - } + out += "return vec4(0.0);\n}"; continue; } - u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; - if (config.state.texture_border_color[texture_unit].enable_s) { - out += fmt::format(R"( - if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ - return tex_border_color[{}]; - }} - )", - texcoord_num, texcoord_num, texture_unit); + if (texture_unit < 3) { + u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit; + if (config.state.texture_border_color[texture_unit].enable_s) { + out += fmt::format(R"( + if (texcoord{}.x < 0 || texcoord{}.x > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } + if (config.state.texture_border_color[texture_unit].enable_t) { + out += fmt::format(R"( + if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ + return tex_border_color[{}]; + }} + )", + texcoord_num, texcoord_num, texture_unit); + } } - if (config.state.texture_border_color[texture_unit].enable_t) { - out += fmt::format(R"( - if (texcoord{}.y < 0 || texcoord{}.y > 1) {{ - return tex_border_color[{}]; - }} - )", - texcoord_num, texcoord_num, texture_unit); - } - // TODO: 3D border? switch (texture_unit) { case 0: @@ -1503,6 +1473,7 @@ vec4 shadowTextureCube(vec2 uv, float w) { out += "return texture(tex0, texcoord0);"; break; } + break; case 1: out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, " "0))) + tex_lod_bias[1]);"; @@ -1510,18 +1481,25 @@ vec4 shadowTextureCube(vec2 uv, float w) { case 2: if (state.texture2_use_coord1) { out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * " - "vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);"; + "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; } else { out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * " "vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);"; } break; + case 3: + if (state.proctex.enable) { + out += "return ProcTex();"; + } else { + out += "return vec4(0.0);"; + } + break; default: UNREACHABLE(); break; } - out += "}"; + out += "\n}\n"; } // We round the interpolated primary color to the nearest 1/255th @@ -1552,11 +1530,18 @@ vec4 secondary_fragment_color = vec4(0.0); "gl_FragCoord.y < float(scissor_y2))) discard;\n"; } - // The PICA depth range is [-1, 0] while in Vulkan that range is [0, 1]. - // Thus in the vertex shader we flip the sign of the z component to place - // it in the correct range. Here we undo the transformation to get the original z_over_w, - // then do our own transformation according to PICA specification. - out += "float z_over_w = -gl_FragCoord.z;\n" + // The PICA depth range is [-1, 0]. The vertex shader outputs the negated Z value, otherwise + // unmodified. The OpenGL depth range is [-1, 1], which is compressed into [near, far] = [0, 1]. + // This compresses our effective range into [0.5, 1]. To account for this we un-negate the value + // to range [-1, -0.5], multiply by 2 to the range [-2, -1], and add 1 to arrive back at the + // original range of [-1, 0]. The Vulkan depth range is [0, 1], so all we need to do is + // un-negate the value to range [-1, 0]. Once we have z_over_w, we can do our own transformation + // according to PICA specification. + out += "#ifdef VULKAN\n" + "float z_over_w = -gl_FragCoord.z;\n" + "#else\n" + "float z_over_w = -2.0 * gl_FragCoord.z + 1.0;\n" + "#endif\n" "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; @@ -1609,7 +1594,7 @@ vec4 secondary_fragment_color = vec4(0.0); } else if (state.fog_mode == TexturingRegs::FogMode::Gas) { Core::System::GetInstance().TelemetrySession().AddField( Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); - LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode"); + LOG_CRITICAL(Render, "Unimplemented gas mode"); out += "discard; }"; return out; } @@ -1642,41 +1627,23 @@ do { } else { out += "gl_FragDepth = depth;\n"; // Round the final fragment color to maintain the PICA's 8 bits of precision - out += "color = byteround(last_tex_env_out);\n"; + out += "last_tex_env_out = byteround(last_tex_env_out);\n"; + WriteBlending(out, config); + out += "color = last_tex_env_out;\n"; } - if (state.emulate_logic_op) { - switch (state.logic_op) { - case FramebufferRegs::LogicOp::Clear: - out += "color = vec4(0);\n"; - break; - case FramebufferRegs::LogicOp::Set: - out += "color = vec4(1);\n"; - break; - case FramebufferRegs::LogicOp::Copy: - // Take the color output as-is - break; - case FramebufferRegs::LogicOp::CopyInverted: - out += "color = ~color;\n"; - break; - case FramebufferRegs::LogicOp::NoOp: - // We need to discard the color, but not necessarily the depth. This is not possible - // with fragment shader alone, so we emulate this behavior with the color mask. - break; - default: - LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", - static_cast(state.logic_op.Value())); - UNIMPLEMENTED(); - } - } + WriteLogicOp(out, config); out += '}'; return out; } -std::string GenerateTrivialVertexShader(bool use_clip_planes) { - std::string out = "#version 450 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n"; +std::string GenerateTrivialVertexShader(bool use_clip_planes, bool separable_shader) { + std::string out; + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } + out += fmt::format("layout(location = {}) in vec4 vert_position;\n" "layout(location = {}) in vec4 vert_color;\n" @@ -1689,10 +1656,13 @@ std::string GenerateTrivialVertexShader(bool use_clip_planes) { ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); - out += GetVertexInterfaceDeclaration(true, use_clip_planes); - - out += UniformBlockDef; + out += GetVertexInterfaceDeclaration(true, use_clip_planes, separable_shader); + out += VSUniformBlockDef; + // Certain games render 2D elements very close to clip plane 0 resulting in very tiny + // negative/positive z values when computing with f32 precision, + // causing some vertices to get erroneously clipped. To workaround this problem, + // we can use a very small epsilon value for clip plane comparison. out += R"( const float EPSILON_Z = 0.00000001f; @@ -1716,7 +1686,7 @@ void main() { if (enable_clip1) { gl_ClipDistance[1] = dot(clip_coef, vtx_pos); } else { - gl_ClipDistance[1] = 0; + gl_ClipDistance[1] = 0.0; } )"; } @@ -1737,11 +1707,15 @@ std::string_view MakeLoadPrefix(AttribLoadFlags flag) { return ""; } -std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, - const PicaVSConfig& config) { - std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; - out += UniformBlockDef; - out += OpenGL::ShaderDecompiler::GetCommonDeclarations(); +std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, + bool separable_shader) { + std::string out; + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } + + out += VSPicaUniformBlockDef; + out += VSUniformBlockDef; std::array used_regs{}; const auto get_input_reg = [&used_regs](u32 reg) { @@ -1758,25 +1732,12 @@ std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& return ""; }; - auto program_source_opt = OpenGL::ShaderDecompiler::DecompileProgram( - setup.program_code, setup.swizzle_data, config.state.main_offset, get_input_reg, - get_output_reg, config.state.sanitize_mul); + auto program_source = + DecompileProgram(setup.program_code, setup.swizzle_data, config.state.main_offset, + get_input_reg, get_output_reg, config.state.sanitize_mul); - if (!program_source_opt) { - return std::nullopt; - } - - std::string& program_source = program_source_opt->code; - - out += R"( -#define uniforms vs_uniforms -layout (set = 0, binding = 0, std140) uniform vs_config { - pica_uniforms uniforms; -}; - -)"; - if (!config.state.use_geometry_shader) { - out += GetVertexInterfaceDeclaration(true, config.use_clip_planes); + if (program_source.empty()) { + return ""; } // input attributes declaration @@ -1786,7 +1747,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config { const std::string_view prefix = MakeLoadPrefix(flags); out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix); - out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); + out += fmt::format("vec4 vs_in_reg{0};\n", i); } } out += '\n'; @@ -1794,21 +1755,26 @@ layout (set = 0, binding = 0, std140) uniform vs_config { if (config.state.use_geometry_shader) { // output attributes declaration for (u32 i = 0; i < config.state.num_outputs; ++i) { - out += fmt::format("layout(location = {0}) out vec4 vs_out_attr{0};\n", i); + if (separable_shader) { + out += fmt::format("layout(location = {}) ", i); + } + out += fmt::format("out vec4 vs_out_attr{};\n", i); } out += "void EmitVtx() {}\n"; } else { + out += GetVertexInterfaceDeclaration(true, config.state.use_clip_planes, separable_shader); + // output attributes declaration for (u32 i = 0; i < config.state.num_outputs; ++i) { out += fmt::format("vec4 vs_out_attr{};\n", i); } const auto semantic = - [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { + [&state = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { const u32 slot = static_cast(slot_semantic); - const u32 attrib = config.semantic_maps[slot].attribute_index; - const u32 comp = config.semantic_maps[slot].component_index; - if (attrib < config.gs_output_attributes) { + const u32 attrib = state.gs_state.semantic_maps[slot].attribute_index; + const u32 comp = state.gs_state.semantic_maps[slot].component_index; + if (attrib < state.gs_state.gs_output_attributes) { return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]); } return "1.0"; @@ -1831,12 +1797,12 @@ layout (set = 0, binding = 0, std140) uniform vs_config { out += " vtx_pos.z = 0.f;\n"; out += " }\n"; out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; - if (config.use_clip_planes) { + if (config.state.use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; out += " } else {\n"; - out += " gl_ClipDistance[1] = 0;\n"; + out += " gl_ClipDistance[1] = 0.0;\n"; out += " }\n\n"; } @@ -1862,43 +1828,50 @@ layout (set = 0, binding = 0, std140) uniform vs_config { out += "}\n"; } + out += "bool exec_shader();\n\n"; + out += "\nvoid main() {\n"; + for (std::size_t i = 0; i < used_regs.size(); ++i) { + if (used_regs[i]) { + out += fmt::format("vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); + if (True(config.state.load_flags[i] & AttribLoadFlags::ZeroW)) { + out += fmt::format("vs_in_reg{0}.w = 0;\n", i); + } + } + } for (u32 i = 0; i < config.state.num_outputs; ++i) { out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); } - for (std::size_t i = 0; i < used_regs.size(); ++i) { - if (used_regs[i] && True(config.state.load_flags[i] & AttribLoadFlags::ZeroW)) { - out += fmt::format("vs_in_reg{0}.w = 0;\n", i); - } - } - out += "\n exec_shader();\nEmitVtx();\n}\n\n"; + out += "\n exec_shader();\n EmitVtx();\n}\n\n"; out += program_source; return out; } -static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool use_clip_planes) { - std::string out = GetVertexInterfaceDeclaration(true, use_clip_planes); - out += UniformBlockDef; - out += OpenGL::ShaderDecompiler::GetCommonDeclarations(); +static std::string GetGSCommonSource(const PicaGSConfigState& state, bool separable_shader) { + std::string out = GetVertexInterfaceDeclaration(true, state.use_clip_planes, separable_shader); + out += VSUniformBlockDef; out += '\n'; - for (u32 i = 0; i < config.vs_output_attributes; ++i) { - out += fmt::format("layout(location = {}) in vec4 vs_out_attr{}[];\n", i, i); + for (u32 i = 0; i < state.vs_output_attributes; ++i) { + if (separable_shader) { + out += fmt::format("layout(location = {}) ", i); + } + out += fmt::format("in vec4 vs_out_attr{}[];\n", i); } out += R"( struct Vertex { )"; - out += fmt::format(" vec4 attributes[{}];\n", config.gs_output_attributes); + out += fmt::format(" vec4 attributes[{}];\n", state.gs_output_attributes); out += "};\n\n"; - const auto semantic = [&config](VSOutputAttributes::Semantic slot_semantic) -> std::string { + const auto semantic = [&state](VSOutputAttributes::Semantic slot_semantic) -> std::string { const u32 slot = static_cast(slot_semantic); - const u32 attrib = config.semantic_maps[slot].attribute_index; - const u32 comp = config.semantic_maps[slot].component_index; - if (attrib < config.gs_output_attributes) { + const u32 attrib = state.semantic_maps[slot].attribute_index; + const u32 comp = state.semantic_maps[slot].component_index; + if (attrib < state.gs_output_attributes) { return fmt::format("vtx.attributes[{}].{}", attrib, "xyzw"[comp]); } return "1.0"; @@ -1921,12 +1894,12 @@ struct Vertex { out += " vtx_pos.z = 0.f;\n"; out += " }\n"; out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; - if (use_clip_planes) { + if (state.use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; out += " } else {\n"; - out += " gl_ClipDistance[1] = 0;\n"; + out += " gl_ClipDistance[1] = 0.0;\n"; out += " }\n\n"; } @@ -1970,9 +1943,11 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) { return out; }; -std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config) { - std::string out = "#version 450 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n"; +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) { + std::string out; + if (separable_shader) { + out += "#extension GL_ARB_separate_shader_objects : enable\n"; + } out += R"( layout(triangles) in; @@ -1980,7 +1955,7 @@ layout(triangle_strip, max_vertices = 3) out; )"; - out += GetGSCommonSource(config.state, config.use_clip_planes); + out += GetGSCommonSource(config.state, separable_shader); out += R"( void main() { @@ -1999,4 +1974,4 @@ void main() { return out; } -} // namespace Vulkan +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/glsl_shader_gen.h b/src/video_core/shader/generator/glsl_shader_gen.h new file mode 100644 index 000000000..6531f4780 --- /dev/null +++ b/src/video_core/shader/generator/glsl_shader_gen.h @@ -0,0 +1,57 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/shader/generator/shader_gen.h" +#include "video_core/shader/shader.h" + +// High precision may or may not be supported in GLES3. If it isn't, use medium precision instead. +static constexpr char fragment_shader_precision_OES[] = R"( +#if GL_ES +#ifdef GL_FRAGMENT_PRECISION_HIGH +precision highp int; +precision highp float; +precision highp samplerBuffer; +precision highp uimage2D; +#else +precision mediump int; +precision mediump float; +precision mediump samplerBuffer; +precision mediump uimage2D; +#endif // GL_FRAGMENT_PRECISION_HIGH +#endif +)"; + +namespace Pica::Shader::Generator::GLSL { + +/** + * Generates the GLSL vertex shader program source code that accepts vertices from software shader + * and directly passes them to the fragment shader. + * @returns String of the shader source code + */ +std::string GenerateTrivialVertexShader(bool use_clip_planes, bool separable_shader); + +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code; empty on failure + */ +std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, + bool separable_shader); + +/** + * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline + * @returns String of the shader source code + */ +std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader); + +/** + * Generates the GLSL fragment shader program source code for the current Pica state + * @param config ShaderCacheKey object generated for the current Pica state, used for the shader + * configuration (NOTE: Use state in this struct only, not the Pica registers!) + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader); + +} // namespace Pica::Shader::Generator::GLSL diff --git a/src/video_core/shader/generator/shader_gen.cpp b/src/video_core/shader/generator/shader_gen.cpp new file mode 100644 index 000000000..2f6011666 --- /dev/null +++ b/src/video_core/shader/generator/shader_gen.cpp @@ -0,0 +1,281 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_set.h" +#include "common/logging/log.h" +#include "video_core/shader/generator/shader_gen.h" +#include "video_core/video_core.h" + +namespace Pica::Shader::Generator { + +PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock, + bool emulate_logic_op, bool emulate_custom_border_color, + bool emulate_blend_minmax_factor, bool use_custom_normal_map) { + state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode); + + state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable); + + state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable + ? regs.framebuffer.output_merger.alpha_test.func.Value() + : Pica::FramebufferRegs::CompareFunc::Always); + + state.texture0_type.Assign(regs.texturing.texture0.type); + + state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); + + const auto pica_textures = regs.texturing.GetTextures(); + for (u32 tex_index = 0; tex_index < 3; tex_index++) { + const auto config = pica_textures[tex_index].config; + state.texture_border_color[tex_index].enable_s.Assign( + emulate_custom_border_color && + config.wrap_s == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + state.texture_border_color[tex_index].enable_t.Assign( + emulate_custom_border_color && + config.wrap_t == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder); + } + + // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs + const bool needs_emulate_logic_op = + emulate_logic_op && !regs.framebuffer.output_merger.alphablend_enable; + + state.emulate_logic_op.Assign(needs_emulate_logic_op); + if (needs_emulate_logic_op) { + state.logic_op.Assign(regs.framebuffer.output_merger.logic_op); + } else { + state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp); + } + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. + const auto& tev_stages = regs.texturing.GetTevStages(); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); + for (std::size_t i = 0; i < tev_stages.size(); i++) { + const auto& tev_stage = tev_stages[i]; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; + if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + state.tev_stages[i].sources_raw &= 0xFFF; + state.tev_stages[i].modifiers_raw &= 0xFFF; + state.tev_stages[i].ops_raw &= 0xF; + } + } + + state.fog_mode.Assign(regs.texturing.fog_mode); + state.fog_flip.Assign(regs.texturing.fog_flip != 0); + + state.combiner_buffer_input.Assign( + regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() | + regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4); + + // Fragment lighting + state.lighting.enable.Assign(!regs.lighting.disable); + if (state.lighting.enable) { + state.lighting.src_num.Assign(regs.lighting.max_light_index + 1); + + for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) { + const u32 num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + state.lighting.light[light_index].num.Assign(num); + state.lighting.light[light_index].directional.Assign(light.config.directional != 0); + state.lighting.light[light_index].two_sided_diffuse.Assign( + light.config.two_sided_diffuse != 0); + state.lighting.light[light_index].geometric_factor_0.Assign( + light.config.geometric_factor_0 != 0); + state.lighting.light[light_index].geometric_factor_1.Assign( + light.config.geometric_factor_1 != 0); + state.lighting.light[light_index].dist_atten_enable.Assign( + !regs.lighting.IsDistAttenDisabled(num)); + state.lighting.light[light_index].spot_atten_enable.Assign( + !regs.lighting.IsSpotAttenDisabled(num)); + state.lighting.light[light_index].shadow_enable.Assign( + !regs.lighting.IsShadowDisabled(num)); + } + + state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0); + if (state.lighting.lut_d0.enable) { + state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0); + state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value()); + state.lighting.lut_d0.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + } + + state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0); + if (state.lighting.lut_d1.enable) { + state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0); + state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value()); + state.lighting.lut_d1.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + } + + // this is a dummy field due to lack of the corresponding register + state.lighting.lut_sp.enable.Assign(1); + state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0); + state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value()); + state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); + + state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0); + if (state.lighting.lut_fr.enable) { + state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0); + state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value()); + state.lighting.lut_fr.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + } + + state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0); + if (state.lighting.lut_rr.enable) { + state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0); + state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value()); + state.lighting.lut_rr.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + } + + state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0); + if (state.lighting.lut_rg.enable) { + state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0); + state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value()); + state.lighting.lut_rg.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + } + + state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0); + if (state.lighting.lut_rb.enable) { + state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0); + state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value()); + state.lighting.lut_rb.scale = + regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + } + + state.lighting.config.Assign(regs.lighting.config0.config); + state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha); + state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha); + state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode); + state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector); + state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0); + state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0); + + state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0); + if (state.lighting.enable_shadow) { + state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0); + state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0); + state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0); + state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0); + state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector); + } + } + + state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable); + if (state.proctex.enable) { + state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates); + state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp); + state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp); + state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner); + state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner); + state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha); + state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable); + state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift); + state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift); + state.proctex.lut_width = regs.texturing.proctex_lut.width; + state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0; + state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1; + state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2; + state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3; + state.proctex.lod_min = regs.texturing.proctex_lut.lod_min; + state.proctex.lod_max = regs.texturing.proctex_lut.lod_max; + state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter); + } + + const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value(); + const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value(); + if (emulate_blend_minmax_factor && regs.framebuffer.output_merger.alphablend_enable) { + if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max || + rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) { + state.rgb_blend.emulate_blending = true; + state.rgb_blend.eq = rgb_eq; + state.rgb_blend.src_factor = + regs.framebuffer.output_merger.alpha_blending.factor_source_rgb; + state.rgb_blend.dst_factor = + regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb; + } + if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max || + alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) { + state.alpha_blend.emulate_blending = true; + state.alpha_blend.eq = alpha_eq; + state.alpha_blend.src_factor = + regs.framebuffer.output_merger.alpha_blending.factor_source_a; + state.alpha_blend.dst_factor = + regs.framebuffer.output_merger.alpha_blending.factor_dest_a; + } + } + + state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow); + state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); + + // We only need fragment shader interlock when shadow rendering. + state.use_fragment_shader_interlock.Assign(state.shadow_rendering && + has_fragment_shader_interlock); + state.use_custom_normal_map.Assign(use_custom_normal_map); +} + +void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) { + use_clip_planes = use_clip_planes_; + + vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); + gs_output_attributes = vs_output_attributes; + + semantic_maps.fill({16, 0}); + for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) { + const std::array semantics{ + regs.rasterizer.vs_output_attributes[attrib].map_x.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_y.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_z.Value(), + regs.rasterizer.vs_output_attributes[attrib].map_w.Value(), + }; + for (u32 comp = 0; comp < 4; ++comp) { + const auto semantic = semantics[comp]; + if (static_cast(semantic) < 24) { + semantic_maps[static_cast(semantic)] = {attrib, comp}; + } else if (semantic != Pica::RasterizerRegs::VSOutputAttributes::INVALID) { + LOG_ERROR(Render, "Invalid/unknown semantic id: {}", semantic); + } + } + } +} + +void PicaVSConfigState::Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + bool use_clip_planes_, bool use_geometry_shader_) { + use_clip_planes = use_clip_planes_; + use_geometry_shader = use_geometry_shader_; + + program_hash = setup.GetProgramCodeHash(); + swizzle_hash = setup.GetSwizzleDataHash(); + main_offset = regs.vs.main_offset; + sanitize_mul = VideoCore::g_hw_shader_accurate_mul; + + num_outputs = 0; + load_flags.fill(AttribLoadFlags::Float); + output_map.fill(16); + + for (int reg : Common::BitSet(regs.vs.output_mask)) { + output_map[reg] = num_outputs++; + } + + if (!use_geometry_shader_) { + gs_state.Init(regs, use_clip_planes_); + } +} + +PicaVSConfig::PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + bool use_clip_planes_, bool use_geometry_shader_) { + state.Init(regs, setup, use_clip_planes_, use_geometry_shader_); +} + +PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_) { + state.Init(regs, use_clip_planes_); +} + +} // namespace Pica::Shader::Generator diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/shader/generator/shader_gen.h similarity index 62% rename from src/video_core/renderer_vulkan/vk_shader_gen.h rename to src/video_core/shader/generator/shader_gen.h index 7ccc01969..3dfbe09e2 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/shader/generator/shader_gen.h @@ -4,14 +4,17 @@ #pragma once -#include #include "common/hash.h" #include "video_core/regs.h" #include "video_core/shader/shader.h" -namespace Vulkan { +namespace Pica::Shader::Generator { -class Instance; +enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1, +}; enum Attributes { ATTRIBUTE_POSITION, @@ -31,13 +34,13 @@ struct TevStageConfigRaw { u32 ops_raw; u32 scales_raw; explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept { - Pica::TexturingRegs::TevStageConfig stage; - stage.sources_raw = sources_raw; - stage.modifiers_raw = modifiers_raw; - stage.ops_raw = ops_raw; - stage.const_color = 0; - stage.scales_raw = scales_raw; - return stage; + return { + .sources_raw = sources_raw, + .modifiers_raw = modifiers_raw, + .ops_raw = ops_raw, + .const_color = 0, + .scales_raw = scales_raw, + }; } }; @@ -56,6 +59,7 @@ struct PicaFSConfigState { BitField<27, 1, u32> shadow_rendering; BitField<28, 1, u32> shadow_texture_orthographic; BitField<29, 1, u32> use_fragment_shader_interlock; + BitField<30, 1, u32> use_custom_normal_map; }; union { @@ -127,24 +131,33 @@ struct PicaFSConfigState { u8 lod_min; u8 lod_max; } proctex; + + struct { + bool emulate_blending; + Pica::FramebufferRegs::BlendEquation eq; + Pica::FramebufferRegs::BlendFactor src_factor; + Pica::FramebufferRegs::BlendFactor dst_factor; + } rgb_blend, alpha_blend; }; /** * This struct contains all state used to generate the GLSL fragment shader that emulates the * current Pica register configuration. This struct is used as a cache key for generated GLSL shader - * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by - * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where - * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) - * two separate shaders sharing the same key. + * programs. The functions in glsl_shader_gen.cpp should retrieve state from this struct only, not + * by directly accessing Pica registers. This should reduce the risk of bugs in shader generation + * where Pica state is not being captured in the shader cache key, thereby resulting in (what should + * be) two separate shaders sharing the same key. */ struct PicaFSConfig : Common::HashableStruct { - PicaFSConfig(const Pica::Regs& regs, const Instance& instance); + PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock, bool emulate_logic_op, + bool emulate_custom_border_color, bool emulate_blend_minmax_factor, + bool use_custom_normal_map = false); - bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + [[nodiscard]] bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); } - bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + [[nodiscard]] bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); } }; @@ -158,12 +171,36 @@ enum class AttribLoadFlags { DECLARE_ENUM_FLAG_OPERATORS(AttribLoadFlags) /** - * This struct contains common information to identify a GL vertex/geometry shader generated from - * PICA vertex/geometry shader. + * This struct contains common information to identify a GLSL geometry shader generated from + * PICA geometry shader. */ -struct PicaShaderConfigCommon { - void Init(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs, - Pica::Shader::ShaderSetup& setup); +struct PicaGSConfigState { + void Init(const Pica::Regs& regs, bool use_clip_planes_); + + bool use_clip_planes; + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; +}; + +/** + * This struct contains common information to identify a GLSL vertex shader generated from + * PICA vertex shader. + */ +struct PicaVSConfigState { + void Init(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, bool use_clip_planes_, + bool use_geometry_shader_); + + bool use_clip_planes; + bool use_geometry_shader; u64 program_hash; u64 swizzle_hash; @@ -177,103 +214,46 @@ struct PicaShaderConfigCommon { // output_map[output register index] -> output attribute index std::array output_map; - bool use_geometry_shader; - u32 vs_output_attributes; - u32 gs_output_attributes; - - struct SemanticMap { - u32 attribute_index; - u32 component_index; - }; - - // semantic_maps[semantic name] -> GS output attribute index + component index - std::array semantic_maps; + PicaGSConfigState gs_state; }; /** * This struct contains information to identify a GL vertex shader generated from PICA vertex * shader. */ -struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs, - Pica::Shader::ShaderSetup& setup, const Instance& instance); - bool use_clip_planes; -}; - -struct PicaGSConfigCommonRaw { - void Init(const Pica::Regs& regs); - - u32 vs_output_attributes; - u32 gs_output_attributes; - - struct SemanticMap { - u32 attribute_index; - u32 component_index; - }; - - // semantic_maps[semantic name] -> GS output attribute index + component index - std::array semantic_maps; +struct PicaVSConfig : Common::HashableStruct { + explicit PicaVSConfig(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + bool use_clip_planes_, bool use_geometry_shader_); }; /** * This struct contains information to identify a GL geometry shader generated from PICA no-geometry * shader pipeline */ -struct PicaFixedGSConfig : Common::HashableStruct { - explicit PicaFixedGSConfig(const Pica::Regs& regs, const Instance& instance); - bool use_clip_planes; +struct PicaFixedGSConfig : Common::HashableStruct { + explicit PicaFixedGSConfig(const Pica::Regs& regs, bool use_clip_planes_); }; -/** - * Generates the GLSL vertex shader program source code that accepts vertices from software shader - * and directly passes them to the fragment shader. - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -std::string GenerateTrivialVertexShader(bool use_clip_planes); - -/** - * Generates the GLSL vertex shader program source code for the given VS program - * @returns String of the shader source code; boost::none on failure - */ -std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, - const PicaVSConfig& config); - -/** - * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline - * @returns String of the shader source code - */ -std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config); - -/** - * Generates the GLSL fragment shader program source code for the current Pica state - * @param config ShaderCacheKey object generated for the current Pica state, used for the shader - * configuration (NOTE: Use state in this struct only, not the Pica registers!) - * @param separable_shader generates shader that can be used for separate shader object - * @returns String of the shader source code - */ -std::string GenerateFragmentShader(const PicaFSConfig& config); - -} // namespace Vulkan +} // namespace Pica::Shader::Generator namespace std { template <> -struct hash { - std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept { +struct hash { + std::size_t operator()(const Pica::Shader::Generator::PicaFSConfig& k) const noexcept { return k.Hash(); } }; template <> -struct hash { - std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept { +struct hash { + std::size_t operator()(const Pica::Shader::Generator::PicaVSConfig& k) const noexcept { return k.Hash(); } }; template <> -struct hash { - std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept { +struct hash { + std::size_t operator()(const Pica::Shader::Generator::PicaFixedGSConfig& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/shader/generator/shader_uniforms.cpp b/src/video_core/shader/generator/shader_uniforms.cpp new file mode 100644 index 000000000..2a3e6beee --- /dev/null +++ b/src/video_core/shader/generator/shader_uniforms.cpp @@ -0,0 +1,26 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "video_core/shader/generator/shader_uniforms.h" +#include "video_core/shader/shader.h" + +namespace Pica::Shader::Generator { + +void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, + const Pica::Shader::ShaderSetup& setup) { + std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), + [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); + std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), + [](const auto& value) -> Common::Vec4u { + return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; + }); + std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), + [](const auto& value) -> Common::Vec4f { + return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()}; + }); +} + +} // namespace Pica::Shader::Generator diff --git a/src/video_core/shader/shader_uniforms.h b/src/video_core/shader/generator/shader_uniforms.h similarity index 83% rename from src/video_core/shader/shader_uniforms.h rename to src/video_core/shader/generator/shader_uniforms.h index 75c69a39f..b37083687 100644 --- a/src/video_core/shader/shader_uniforms.h +++ b/src/video_core/shader/generator/shader_uniforms.h @@ -12,10 +12,10 @@ struct ShaderRegs; } namespace Pica::Shader { - struct ShaderSetup; +} -enum class UniformBindings : u32 { Common, VS, GS }; +namespace Pica::Shader::Generator { struct LightSrc { alignas(16) Common::Vec3f specular_0; @@ -34,7 +34,7 @@ struct LightSrc { * the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. * Not following that rule will cause problems on some AMD drivers. */ -struct UniformData { +struct FSUniformData { int framebuffer_scale; int alphatest_ref; float depth_scale; @@ -53,7 +53,6 @@ struct UniformData { int proctex_diff_lut_offset; float proctex_bias; int shadow_texture_bias; - alignas(4) bool enable_clip1; alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4]; alignas(16) Common::Vec3f fog_color; alignas(8) Common::Vec2f proctex_noise_f; @@ -65,13 +64,12 @@ struct UniformData { alignas(16) Common::Vec4f tev_combiner_buffer_color; alignas(16) Common::Vec3f tex_lod_bias; alignas(16) Common::Vec4f tex_border_color[3]; - alignas(16) Common::Vec4f clip_coef; alignas(16) Common::Vec4f blend_color; }; -static_assert(sizeof(UniformData) == 0x540, +static_assert(sizeof(FSUniformData) == 0x530, "The size of the UniformData does not match the structure in the shader"); -static_assert(sizeof(UniformData) < 16384, +static_assert(sizeof(FSUniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /** @@ -91,13 +89,20 @@ struct PicaUniformsData { }; struct VSUniformData { - PicaUniformsData uniforms; + bool enable_clip1; + alignas(16) Common::Vec4f clip_coef; }; -static_assert(sizeof(VSUniformData) == 1856, +static_assert(sizeof(VSUniformData) == 32, "The size of the VSUniformData does not match the structure in the shader"); static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the OpenGL spec"); -std::string BuildShaderUniformDefinitions(const std::string& extra_layout_parameters = ""); +struct VSPicaUniformData { + alignas(16) PicaUniformsData uniforms; +}; +static_assert(sizeof(VSPicaUniformData) == 1856, + "The size of the VSPicaUniformData does not match the structure in the shader"); +static_assert(sizeof(VSPicaUniformData) < 16384, + "VSPicaUniformData structure must be less than 16kb as per the OpenGL spec"); -} // namespace Pica::Shader +} // namespace Pica::Shader::Generator diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/shader/generator/spv_shader_gen.cpp similarity index 97% rename from src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp rename to src/video_core/shader/generator/spv_shader_gen.cpp index 89ac2d7f9..8c2743e18 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp +++ b/src/video_core/shader/generator/spv_shader_gen.cpp @@ -4,7 +4,7 @@ #include "core/core.h" #include "core/telemetry_session.h" -#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" +#include "video_core/shader/generator/spv_shader_gen.h" using Pica::FramebufferRegs; using Pica::LightingRegs; @@ -12,7 +12,7 @@ using Pica::RasterizerRegs; using Pica::TexturingRegs; using TevStageConfig = TexturingRegs::TevStageConfig; -namespace Vulkan { +namespace Pica::Shader::Generator::SPIRV { constexpr u32 SPIRV_VERSION_1_3 = 0x00010300; @@ -52,7 +52,7 @@ void FragmentModule::Generate() { } combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f); - next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(27)); + next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(26)); last_tex_env_out = rounded_primary_color; // Write shader bytecode to emulate PICA TEV stages @@ -192,7 +192,7 @@ void FragmentModule::WriteFog() { // Blend the fog const Id tex_env_rgb{ OpVectorShuffle(vec_ids.Get(3), last_tex_env_out, last_tex_env_out, 0, 1, 2)}; - const Id fog_color{GetShaderDataMember(vec_ids.Get(3), ConstS32(20))}; + const Id fog_color{GetShaderDataMember(vec_ids.Get(3), ConstS32(19))}; const Id fog_factor_rgb{ OpCompositeConstruct(vec_ids.Get(3), fog_factor, fog_factor, fog_factor)}; const Id fog_result{OpFMix(vec_ids.Get(3), fog_color, tex_env_rgb, fog_factor_rgb)}; @@ -202,7 +202,7 @@ void FragmentModule::WriteFog() { void FragmentModule::WriteGas() { // TODO: Implement me telemetry.AddField(Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true); - LOG_CRITICAL(Render_Vulkan, "Unimplemented gas mode"); + LOG_CRITICAL(Render, "Unimplemented gas mode"); OpKill(); OpFunctionEnd(); } @@ -380,7 +380,7 @@ void FragmentModule::WriteLighting() { const auto GetLightMember = [&](s32 member) -> Id { const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id; const Id light_num{ConstS32(static_cast(lighting.light[light_index].num.Value()))}; - return GetShaderDataMember(member_type, ConstS32(25), light_num, ConstS32(member)); + return GetShaderDataMember(member_type, ConstS32(24), light_num, ConstS32(member)); }; // Compute light vector (directional or positional) @@ -583,7 +583,7 @@ void FragmentModule::WriteLighting() { } // Sum final lighting result - const Id lighting_global_ambient{GetShaderDataMember(vec_ids.Get(3), ConstS32(24))}; + const Id lighting_global_ambient{GetShaderDataMember(vec_ids.Get(3), ConstS32(23))}; const Id lighting_global_ambient_rgba{ PadVectorF32(lighting_global_ambient, vec_ids.Get(4), 0.f)}; const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)}; @@ -706,7 +706,7 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) break; } default: - LOG_CRITICAL(Render_Vulkan, "Unknown alpha test condition {}", func); + LOG_CRITICAL(Render, "Unknown alpha test condition {}", func); break; } } @@ -791,7 +791,7 @@ Id FragmentModule::AppendProcTexShiftOffset(Id v, ProcTexShift mode, ProcTexClam case ProcTexShift::Even: return shift(true); default: - LOG_CRITICAL(Render_Vulkan, "Unknown shift mode {}", mode); + LOG_CRITICAL(Render, "Unknown shift mode {}", mode); return ConstF32(0.f); } } @@ -819,7 +819,7 @@ Id FragmentModule::AppendProcTexClamp(Id var, ProcTexClamp mode) { case ProcTexClamp::Pulse: return OpSelect(f32_id, OpFOrdGreaterThan(bool_id, var, ConstF32(0.5f)), one, zero); default: - LOG_CRITICAL(Render_Vulkan, "Unknown clamp mode {}", mode); + LOG_CRITICAL(Render, "Unknown clamp mode {}", mode); return OpFMin(f32_id, var, one); } } @@ -851,7 +851,7 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id return OpFMin(f32_id, OpFMul(f32_id, r, ConstF32(0.5f)), ConstF32(1.f)); } default: - LOG_CRITICAL(Render_Vulkan, "Unknown combiner {}", combiner); + LOG_CRITICAL(Render, "Unknown combiner {}", combiner); return ConstF32(0.f); } }(); @@ -916,7 +916,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { AddLabel(border_label); const Id border_color{ - GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))}; + GetShaderDataMember(vec_ids.Get(4), ConstS32(28), ConstU32(texture_unit))}; OpReturnValue(border_color); AddLabel(not_border_label); @@ -937,7 +937,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { const Id dx_dy_max{ OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))}; const Id lod{OpLog2(f32_id, dx_dy_max)}; - const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))}; + const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(27), ConstU32(texture_unit))}; const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)}; return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, spv::ImageOperandsMask::Lod, biased_lod); @@ -976,7 +976,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) { // return "shadowTextureCube(texcoord0, texcoord0_w)"; break; default: - LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type.Value()); + LOG_CRITICAL(Render, "Unhandled texture type {:x}", state.texture0_type.Value()); UNIMPLEMENTED(); ret_val = zero_vec; break; @@ -1012,7 +1012,7 @@ Id FragmentModule::ProcTexSampler() { const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])}; uv = OpFAbs(vec_ids.Get(2), texcoord); } else { - LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3"); + LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3"); uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0])); } @@ -1047,7 +1047,7 @@ Id FragmentModule::ProcTexSampler() { // Generate noise if (config.state.proctex.noise_enable) { - const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(22))}; + const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))}; const Id noise_coef{ProcTexNoiseCoef(uv)}; uv = OpFAdd(vec_ids.Get(2), uv, OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_a, noise_coef)); @@ -1158,8 +1158,8 @@ Id FragmentModule::ProcTexNoiseCoef(Id x) { return OpFma(f32_id, OpConvertSToF(f32_id, v2), ConstF32(2.f / 15.f), ConstF32(-1.f)); }; - const Id proctex_noise_f{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))}; - const Id proctex_noise_p{GetShaderDataMember(vec_ids.Get(2), ConstS32(23))}; + const Id proctex_noise_f{GetShaderDataMember(vec_ids.Get(2), ConstS32(20))}; + const Id proctex_noise_p{GetShaderDataMember(vec_ids.Get(2), ConstS32(22))}; const Id grid{OpFMul(vec_ids.Get(2), OpVectorTimesScalar(vec_ids.Get(2), proctex_noise_f, ConstF32(9.f)), OpFAbs(vec_ids.Get(2), OpFAdd(vec_ids.Get(2), x, proctex_noise_p)))}; @@ -1245,7 +1245,7 @@ Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) { const Id lut_index_x{OpShiftRightArithmetic(i32_id, lut_index, ConstS32(2))}; const Id lut_index_y{OpBitwiseAnd(i32_id, lut_index, ConstS32(3))}; - const Id lut_offset{GetShaderDataMember(i32_id, ConstS32(19), lut_index_x, lut_index_y)}; + const Id lut_offset{GetShaderDataMember(i32_id, ConstS32(18), lut_index_x, lut_index_y)}; const Id coord{OpIAdd(i32_id, lut_offset, index)}; const Id entry{ OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_lf), coord)}; @@ -1274,11 +1274,11 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) { case Source::PreviousBuffer: return combiner_buffer; case Source::Constant: - return GetShaderDataMember(vec_ids.Get(4), ConstS32(26), ConstS32(index)); + return GetShaderDataMember(vec_ids.Get(4), ConstS32(25), ConstS32(index)); case Source::Previous: return last_tex_env_out; default: - LOG_CRITICAL(Render_Vulkan, "Unknown source op {}", source); + LOG_CRITICAL(Render, "Unknown source op {}", source); return ConstF32(0.f, 0.f, 0.f, 0.f); } } @@ -1315,7 +1315,7 @@ Id FragmentModule::AppendColorModifier(TevStageConfig::ColorModifier modifier, case ColorModifier::OneMinusSourceAlpha: return OpFSub(vec_ids.Get(3), one_vec, shuffle(3, 3, 3)); default: - LOG_CRITICAL(Render_Vulkan, "Unknown color modifier op {}", modifier); + LOG_CRITICAL(Render, "Unknown color modifier op {}", modifier); return one_vec; } } @@ -1346,7 +1346,7 @@ Id FragmentModule::AppendAlphaModifier(TevStageConfig::AlphaModifier modifier, case AlphaModifier::OneMinusSourceBlue: return OpFSub(f32_id, one_f32, component(2)); default: - LOG_CRITICAL(Render_Vulkan, "Unknown alpha modifier op {}", modifier); + LOG_CRITICAL(Render, "Unknown alpha modifier op {}", modifier); return one_f32; } } @@ -1395,7 +1395,7 @@ Id FragmentModule::AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Oper break; default: color = zero_vec; - LOG_CRITICAL(Render_Vulkan, "Unknown color combiner operation: {}", operation); + LOG_CRITICAL(Render, "Unknown color combiner operation: {}", operation); break; } @@ -1435,7 +1435,7 @@ Id FragmentModule::AppendAlphaCombiner(TevStageConfig::Operation operation) { break; default: color = ConstF32(0.f); - LOG_CRITICAL(Render_Vulkan, "Unknown alpha combiner operation: {}", operation); + LOG_CRITICAL(Render, "Unknown alpha combiner operation: {}", operation); break; } @@ -1485,16 +1485,15 @@ void FragmentModule::DefineUniformStructs() { const Id shader_data_struct_id{ TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, - i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, + i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))}; constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u}; - constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, - 32u, 36u, 40u, 44u, 48u, 52u, 56u, 60u, - 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, - 224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u}; + constexpr std::array shader_data_offsets{ + 0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u, 52u, 56u, + 60u, 64u, 68u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u}; Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u); Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u); @@ -1511,7 +1510,7 @@ void FragmentModule::DefineUniformStructs() { shader_data_id = AddGlobalVariable( TypePointer(spv::StorageClass::Uniform, shader_data_struct_id), spv::StorageClass::Uniform); Decorate(shader_data_id, spv::Decoration::DescriptorSet, 0); - Decorate(shader_data_id, spv::Decoration::Binding, 1); + Decorate(shader_data_id, spv::Decoration::Binding, 2); } void FragmentModule::DefineInterface() { @@ -1532,9 +1531,9 @@ void FragmentModule::DefineInterface() { image_r32_id = TypeImage(u32_id, spv::Dim::Dim2D, 0, 0, 0, 2, spv::ImageFormat::R32ui); sampler_id = TypeSampler(); - texture_buffer_lut_lf_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 2); - texture_buffer_lut_rg_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 3); - texture_buffer_lut_rgba_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 4); + texture_buffer_lut_lf_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 3); + texture_buffer_lut_rg_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 4); + texture_buffer_lut_rgba_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 5); tex0_id = DefineUniformConst(TypeSampledImage(image2d_id), 1, 0); tex1_id = DefineUniformConst(TypeSampledImage(image2d_id), 1, 1); tex2_id = DefineUniformConst(TypeSampledImage(image2d_id), 1, 2); @@ -1550,11 +1549,11 @@ void FragmentModule::DefineInterface() { Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); } -std::vector GenerateFragmentShaderSPV(const PicaFSConfig& config) { +std::vector GenerateFragmentShader(const PicaFSConfig& config) { auto& telemetry = Core::System::GetInstance().TelemetrySession(); FragmentModule module{telemetry, config}; module.Generate(); return module.Assemble(); } -} // namespace Vulkan +} // namespace Pica::Shader::Generator::SPIRV diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h b/src/video_core/shader/generator/spv_shader_gen.h similarity index 97% rename from src/video_core/renderer_vulkan/vk_shader_gen_spv.h rename to src/video_core/shader/generator/spv_shader_gen.h index 32bac11c2..4030a7a8e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h +++ b/src/video_core/shader/generator/spv_shader_gen.h @@ -7,13 +7,13 @@ #include #include -#include "video_core/renderer_vulkan/vk_shader_gen.h" +#include "video_core/shader/generator/shader_gen.h" namespace Core { class TelemetrySession; } -namespace Vulkan { +namespace Pica::Shader::Generator::SPIRV { using Sirit::Id; @@ -41,7 +41,7 @@ public: void Generate(); private: - /// Undos the vulkan perspective transformation and applies the PICA one + /// Undos the host perspective transformation and applies the PICA one void WriteDepth(); /// Emits code to emulate the scissor rectangle @@ -289,6 +289,6 @@ private: * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::vector GenerateFragmentShaderSPV(const PicaFSConfig& config); +std::vector GenerateFragmentShader(const PicaFSConfig& config); -} // namespace Vulkan +} // namespace Pica::Shader::Generator::SPIRV diff --git a/src/video_core/shader/shader_uniforms.cpp b/src/video_core/shader/shader_uniforms.cpp deleted file mode 100644 index 48354e5d5..000000000 --- a/src/video_core/shader/shader_uniforms.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2023 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include "video_core/shader/shader.h" -#include "video_core/shader/shader_uniforms.h" - -namespace Pica::Shader { - -void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, - const Pica::Shader::ShaderSetup& setup) { - std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), - [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); - std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), - [](const auto& value) -> Common::Vec4u { - return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; - }); - std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), - [](const auto& value) -> Common::Vec4f { - return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), - value.w.ToFloat32()}; - }); -} - -constexpr std::string_view UniformBlockDefFormat = R"( -#define NUM_TEV_STAGES 6 -#define NUM_LIGHTS 8 -#define NUM_LIGHTING_SAMPLERS 24 -struct LightSrc {{ - vec3 specular_0; - vec3 specular_1; - vec3 diffuse; - vec3 ambient; - vec3 position; - vec3 spot_direction; - float dist_atten_bias; - float dist_atten_scale; -}}; -layout ({}std140) uniform shader_data {{ - int framebuffer_scale; - int alphatest_ref; - float depth_scale; - float depth_offset; - float shadow_bias_constant; - float shadow_bias_linear; - int scissor_x1; - int scissor_y1; - int scissor_x2; - int scissor_y2; - int fog_lut_offset; - int proctex_noise_lut_offset; - int proctex_color_map_offset; - int proctex_alpha_map_offset; - int proctex_lut_offset; - int proctex_diff_lut_offset; - float proctex_bias; - int shadow_texture_bias; - bool enable_clip1; - ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4]; - vec3 fog_color; - vec2 proctex_noise_f; - vec2 proctex_noise_a; - vec2 proctex_noise_p; - vec3 lighting_global_ambient; - LightSrc light_src[NUM_LIGHTS]; - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; - vec3 tex_lod_bias; - vec4 tex_border_color[3]; - vec4 clip_coef; - vec4 blend_color; -}}; -)"; - -std::string BuildShaderUniformDefinitions(const std::string& extra_layout_parameters) { - return fmt::format(UniformBlockDefFormat, extra_layout_parameters); -} - -} // namespace Pica::Shader