diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 640d8fbc5..d14939e7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -280,7 +280,15 @@ bool RasterizerOpenGL::SetupGeometryShader() { return false; } - shader_manager.UseFixedGeometryShader(regs); + // Enable the quaternion fix-up geometry-shader only if we are actually doing per-fragment + // lighting and care about proper quaternions. Otherwise just use standard vertex+fragment + // shaders + if (regs.lighting.disable) { + shader_manager.UseTrivialGeometryShader(); + } else { + shader_manager.UseFixedGeometryShader(regs); + } + return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 44a753f6c..9f62a2f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -88,7 +88,12 @@ static std::tuple BuildVSConfigFromRaw( Pica::Shader::ShaderSetup setup; setup.program_code = program_code; setup.swizzle_data = swizzle_data; - return {PicaVSConfig{raw.GetRawShaderConfig(), setup, driver.HasClipCullDistance(), true}, + + // Enable the geometry-shader only if we are actually doing per-fragment lighting + // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders + const bool use_geometry_shader = !raw.GetRawShaderConfig().lighting.disable; + return {PicaVSConfig{raw.GetRawShaderConfig(), setup, driver.HasClipCullDistance(), + use_geometry_shader}, setup}; } @@ -265,6 +270,7 @@ public: .has_geometry_shader = true, .has_custom_border_color = true, .has_fragment_shader_interlock = false, + .has_fragment_shader_barycentric = false, .has_blend_minmax_factor = driver.HasBlendMinMaxFactor(), .has_minus_one_to_one_range = true, .has_logic_op = !driver.IsOpenGLES(), @@ -272,6 +278,8 @@ public: .has_gl_arm_framebuffer_fetch = driver.HasArmShaderFramebufferFetch(), .has_gl_nv_fragment_shader_interlock = driver.GetVendor() == Vendor::Nvidia, .has_gl_intel_fragment_shader_interlock = driver.GetVendor() == Vendor::Intel, + // TODO: This extension requires GLSL 450 / OpenGL 4.5 context. + .has_gl_nv_fragment_shader_barycentric = false, .is_vulkan = false, }; } @@ -327,7 +335,11 @@ ShaderProgramManager::~ShaderProgramManager() = default; bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup) { - PicaVSConfig config{regs, setup, driver.HasClipCullDistance(), true}; + // Enable the geometry-shader only if we are actually doing per-fragment lighting + // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders + const bool use_geometry_shader = !regs.lighting.disable; + + PicaVSConfig config{regs, setup, driver.HasClipCullDistance(), use_geometry_shader}; auto [handle, result] = impl->programmable_vertex_shaders.Get(config, setup); if (handle == 0) return false; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index f03ca3ba9..985c547fe 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -403,7 +403,8 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, - vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(); + vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT, + vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); @@ -436,6 +437,7 @@ bool Instance::CreateDevice() { }; const bool is_nvidia = driver_id == vk::DriverIdKHR::eNvidiaProprietary; + const bool is_moltenvk = driver_id == vk::DriverIdKHR::eMoltenvk; const bool is_arm = driver_id == vk::DriverIdKHR::eArmProprietary; const bool is_qualcomm = driver_id == vk::DriverIdKHR::eQualcommProprietary; @@ -459,6 +461,9 @@ bool Instance::CreateDevice() { const bool has_pipeline_creation_cache_control = add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, is_nvidia, "it is broken on Nvidia drivers"); + const bool has_fragment_shader_barycentric = + add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME, is_moltenvk, + "the PerVertexKHR attribute is not supported by MoltenVK"); const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -514,6 +519,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{}, vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT{}, vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT{}, + vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{}, }; #define PROP_GET(structName, prop, property) property = properties_chain.get().prop; @@ -581,6 +587,13 @@ bool Instance::CreateDevice() { device_chain.unlink(); } + if (has_fragment_shader_barycentric) { + FEAT_SET(vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR, fragmentShaderBarycentric, + fragment_shader_barycentric) + } else { + device_chain.unlink(); + } + #undef PROP_GET #undef FEAT_SET diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 07cface6f..d5fe88a97 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -168,6 +168,11 @@ public: return shader_stencil_export; } + /// Returns true when VK_KHR_fragment_shader_barycentric is supported + bool IsFragmentShaderBarycentricSupported() const { + return fragment_shader_barycentric; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -307,6 +312,7 @@ private: bool fragment_shader_interlock{}; bool image_format_list{}; bool pipeline_creation_cache_control{}; + bool fragment_shader_barycentric{}; bool shader_stencil_export{}; bool tooling_info{}; bool debug_utils_supported{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a041afb45..bb347e3bf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -96,6 +96,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .has_geometry_shader = instance.UseGeometryShaders(), .has_custom_border_color = instance.IsCustomBorderColorSupported(), .has_fragment_shader_interlock = instance.IsFragmentShaderInterlockSupported(), + .has_fragment_shader_barycentric = instance.IsFragmentShaderBarycentricSupported(), .has_blend_minmax_factor = false, .has_minus_one_to_one_range = false, .has_logic_op = !instance.NeedsLogicOpEmulation(), @@ -331,8 +332,13 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { - PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), - instance.UseGeometryShaders()}; + // Enable the geometry-shader only if we are actually doing per-fragment lighting + // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders. + // We also don't need the geometry shader if we have the barycentric extension. + const bool use_geometry_shader = instance.UseGeometryShaders() && !regs.lighting.disable && + !instance.IsFragmentShaderBarycentricSupported(); + + PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), use_geometry_shader}; for (u32 i = 0; i < layout.attribute_count; i++) { const VertexAttribute& attr = layout.attributes[i]; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f6e652fca..37b4f2b09 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -337,6 +337,14 @@ bool RasterizerVulkan::SetupGeometryShader() { return false; } + // Enable the quaternion fix-up geometry-shader only if we are actually doing per-fragment + // lighting and care about proper quaternions. Otherwise just use standard vertex+fragment + // shaders. We also don't need a geometry shader if the barycentric extension is supported. + if (regs.lighting.disable || instance.IsFragmentShaderBarycentricSupported()) { + pipeline_cache.UseTrivialGeometryShader(); + return true; + } + return pipeline_cache.UseFixedGeometryShader(regs); } diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index 449ed62c1..50f57ed11 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -567,6 +567,17 @@ void FragmentModule::WriteLighting() { } } + // If the barycentric extension is enabled, perform quaternion correction here. + if (use_fragment_shader_barycentric) { + out += "vec4 normquat_0 = normquats[0];\n" + "vec4 normquat_1 = mix(normquats[1], -normquats[1], " + "bvec4(AreQuaternionsOpposite(normquats[0], normquats[1])));\n" + "vec4 normquat_2 = mix(normquats[2], -normquats[2], " + "bvec4(AreQuaternionsOpposite(normquats[0], normquats[2])));\n" + "vec4 normquat = gl_BaryCoord.x * normquat_0 + gl_BaryCoord.y * normquat_1 + " + "gl_BaryCoord.z * normquat_2;\n"; + } + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to // eyespace. out += "vec4 normalized_normquat = normalize(normquat);\n" @@ -1231,6 +1242,20 @@ void FragmentModule::DefineExtensions() { use_fragment_shader_interlock = false; } } + if (config.lighting.enable) { + use_fragment_shader_barycentric = true; + if (profile.has_fragment_shader_barycentric) { + out += "#extension GL_EXT_fragment_shader_barycentric : enable\n"; + out += "#define pervertex pervertexEXT\n"; + out += "#define gl_BaryCoord gl_BaryCoordEXT\n"; + } else if (profile.has_gl_nv_fragment_shader_barycentric) { + out += "#extension GL_NV_fragment_shader_barycentric : enable\n"; + out += "#define pervertex pervertexNV\n"; + out += "#define gl_BaryCoord gl_BaryCoordNV\n"; + } else { + use_fragment_shader_barycentric = false; + } + } if (config.EmulateBlend()) { if (profile.has_gl_ext_framebuffer_fetch) { out += "#extension GL_EXT_shader_framebuffer_fetch : enable\n"; @@ -1263,7 +1288,11 @@ void FragmentModule::DefineInterface() { define_input("vec2 texcoord1", Semantic::Texcoord1); define_input("vec2 texcoord2", Semantic::Texcoord2); define_input("float texcoord0_w", Semantic::Texcoord0_W); - define_input("vec4 normquat", Semantic::Normquat); + if (use_fragment_shader_barycentric) { + define_input("pervertex vec4 normquats[]", Semantic::Normquat); + } else { + define_input("vec4 normquat", Semantic::Normquat); + } define_input("vec3 view", Semantic::View); // Output attributes @@ -1360,6 +1389,14 @@ float LookupLightingLUTSigned(int lut_index, float pos) { return LookupLightingLUT(lut_index, index, delta); } )"; + + if (use_fragment_shader_barycentric) { + out += R"( +bool AreQuaternionsOpposite(vec4 qa, vec4 qb) { + return (dot(qa, qb) < 0.0); +} +)"; + } } void FragmentModule::DefineShadowHelpers() { diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.h b/src/video_core/shader/generator/glsl_fs_shader_gen.h index 77836315c..f8541f1fe 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.h +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.h @@ -87,6 +87,7 @@ private: std::string out; bool use_blend_fallback{}; bool use_fragment_shader_interlock{}; + bool use_fragment_shader_barycentric{}; }; /** diff --git a/src/video_core/shader/generator/profile.h b/src/video_core/shader/generator/profile.h index ba2a38085..1c16bb787 100644 --- a/src/video_core/shader/generator/profile.h +++ b/src/video_core/shader/generator/profile.h @@ -12,6 +12,7 @@ struct Profile { bool has_geometry_shader{}; bool has_custom_border_color{}; bool has_fragment_shader_interlock{}; + bool has_fragment_shader_barycentric{}; bool has_blend_minmax_factor{}; bool has_minus_one_to_one_range{}; bool has_logic_op{}; @@ -19,6 +20,7 @@ struct Profile { bool has_gl_arm_framebuffer_fetch{}; bool has_gl_nv_fragment_shader_interlock{}; bool has_gl_intel_fragment_shader_interlock{}; + bool has_gl_nv_fragment_shader_barycentric{}; bool is_vulkan{}; };