From 8d9d508dc78eb5225c99cb425fa484999f3c4305 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sat, 22 Apr 2023 22:02:39 +0100 Subject: [PATCH] Shader: Bias textureGather instructions on AMD/Intel (#4703) * Experimental (GLSL, forced) * SPIR-V attempt * Add capability * Fix pCount == 1 on glsl * Fix typo --- Ryujinx.Graphics.GAL/Capabilities.cs | 6 +++- .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../Shader/GpuAccessorBase.cs | 2 ++ Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 10 +++++-- .../Glsl/Instructions/InstGenMemory.cs | 23 +++++++++++++- .../CodeGen/Spirv/Instructions.cs | 30 +++++++++++++++++++ Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 ++++++ .../HardwareCapabilities.cs | 5 +++- Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 6 ++-- 9 files changed, 84 insertions(+), 9 deletions(-) diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 7822da211..bc4a02c97 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -48,6 +48,8 @@ namespace Ryujinx.Graphics.GAL public readonly float MaximumSupportedAnisotropy; public readonly int StorageBufferOffsetAlignment; + public readonly int GatherBiasPrecision; + public Capabilities( TargetApi api, string vendorName, @@ -87,7 +89,8 @@ namespace Ryujinx.Graphics.GAL uint maximumImagesPerStage, int maximumComputeSharedMemorySize, float maximumSupportedAnisotropy, - int storageBufferOffsetAlignment) + int storageBufferOffsetAlignment, + int gatherBiasPrecision) { Api = api; VendorName = vendorName; @@ -128,6 +131,7 @@ namespace Ryujinx.Graphics.GAL MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize; MaximumSupportedAnisotropy = maximumSupportedAnisotropy; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; + GatherBiasPrecision = gatherBiasPrecision; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 0b87cc910..48464f832 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 4404; + private const uint CodeGenVersion = 4703; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 1402f146b..bbf2702e4 100644 --- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -112,6 +112,8 @@ namespace Ryujinx.Graphics.Gpu.Shader }; } + public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision; + public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision; public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug; diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 91e52178f..5a2e3fe4e 100644 --- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -103,11 +103,14 @@ namespace Ryujinx.Graphics.OpenGL public Capabilities GetCapabilities() { + bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows; + bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows; + return new Capabilities( api: TargetApi.OpenGL, vendorName: GpuVendor, - hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows, - hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows, + hasFrontFacingBug: intelWindows, + hasVectorIndexingBug: amdWindows, needsFragmentOutputSpecialization: false, reduceShaderPrecision: false, supportsAstcCompression: HwCapabilities.SupportsAstcCompression, @@ -142,7 +145,8 @@ namespace Ryujinx.Graphics.OpenGL maximumImagesPerStage: 8, maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize, maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy, - storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment); + storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment, + gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan. } public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan data) diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 263eada6f..a5d2632ce 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -677,7 +677,28 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return vector; } - Append(ApplyScaling(AssemblePVector(pCount))); + string ApplyBias(string vector) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + + if (pCount == 1) + { + vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))"; + } + else + { + vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))"; + } + } + + return vector; + } + + Append(ApplyBias(ApplyScaling(AssemblePVector(pCount)))); string AssembleDerivativesVector(int count) { diff --git a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 14d6ab52a..b3db19051 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -4,6 +4,7 @@ using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Numerics; using static Spv.Specification; @@ -1556,6 +1557,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } } + SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount); + var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount); + + var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1))); + var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray()); + + var one = context.Constant(context.TypeFP32(), 1f); + var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray()); + + var divisor = context.FMul( + pVectorType, + context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)), + biasVector); + + vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor)); + } + + return vector; + } + SpvInstruction pCoords = AssemblePVector(pCount); pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount); @@ -1716,6 +1744,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv image = context.Image(imageType, image); } + pCoords = ApplyBias(pCoords, image); + var operands = operandsList.ToArray(); SpvInstruction result; diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index ba5f2a92f..bc5e67c35 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -196,6 +196,15 @@ namespace Ryujinx.Graphics.Shader return false; } + /// + /// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias. + /// + /// Bits of gather operation precision to use for coordinate bias + int QueryHostGatherBiasPrecision() + { + return 0; + } + /// /// Queries host about whether to reduce precision to improve performance. /// diff --git a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs index a45c2409b..e206bb299 100644 --- a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs +++ b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs @@ -46,6 +46,7 @@ namespace Ryujinx.Graphics.Vulkan public readonly SampleCountFlags SupportedSampleCounts; public readonly PortabilitySubsetFlags PortabilitySubset; public readonly uint VertexBufferAlignment; + public readonly uint SubTexelPrecisionBits; public HardwareCapabilities( bool supportsIndexTypeUint8, @@ -77,7 +78,8 @@ namespace Ryujinx.Graphics.Vulkan ShaderStageFlags requiredSubgroupSizeStages, SampleCountFlags supportedSampleCounts, PortabilitySubsetFlags portabilitySubset, - uint vertexBufferAlignment) + uint vertexBufferAlignment, + uint subTexelPrecisionBits) { SupportsIndexTypeUint8 = supportsIndexTypeUint8; SupportsCustomBorderColor = supportsCustomBorderColor; @@ -109,6 +111,7 @@ namespace Ryujinx.Graphics.Vulkan SupportedSampleCounts = supportedSampleCounts; PortabilitySubset = portabilitySubset; VertexBufferAlignment = vertexBufferAlignment; + SubTexelPrecisionBits = subTexelPrecisionBits; } } } diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 92b453fb1..1c295d6ff 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -311,7 +311,8 @@ namespace Ryujinx.Graphics.Vulkan propertiesSubgroupSizeControl.RequiredSubgroupSizeStages, supportedSampleCounts, portabilityFlags, - vertexBufferAlignment); + vertexBufferAlignment, + properties.Limits.SubTexelPrecisionBits); IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice); @@ -576,7 +577,8 @@ namespace Ryujinx.Graphics.Vulkan maximumImagesPerStage: Constants.MaxImagesPerStage, maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize, maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy, - storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment); + storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment, + gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0); } public HardwareInfo GetHardwareInfo()