GPU: Add fallback when textureGatherOffsets is not supported (#5792)

* GPU: Add fallback when textureGatherOffsets is not supported.

This PR adds a fallback for GPUs or APIs that don't support an equivalent to the method `textureGatherOffsets`, where each of the 4 gathered texels has an individual offset. This is done by reusing the existing code to handle non-const offsets for texture instructions, though it has also been corrected as there were a few implementation issues.

MoltenVK reports support for this capability, and it didn't error when we initially released the MacOS build, but that has since changed. MVK still reports support, but spirv-cross has been fixed in a way that it _attempts_ to use this capability, but the metal compiler errors since it doesn't exist.

Some other fixes:
- textureGatherOffsets emulation has been changed significantly. It now uses 4 texture sample instructions (not gather), calculates a base texel (i=0 j=0) and adds the offsets onto it before converting into a tex coord. The final result is offset into a texel center, so it shouldn't be subject to interpolation, though this isn't perfect and could have some error with floating point formats with linear sampling. It is subject to texture wrap mode as it should be, which is why texelFetch was not used.
  - Maybe gather should be used here with component `w` (i=0, j=0), though this multiplies number of texels fetched by 4... The way it was doing this before _was_ wrong_, but doing it right would avoid issues with texel center precision.
- textureGatherOffset (singular) now performs textureGather with the offset applied to the coords, rather than the slower fallback where each texel is fetched individually.

* Increment shader cache version, remove unused arg

* Use base texture size for gather coord offset.

Implicit LOD for gather is not supported.

* Use 4 texture gathers for offsets emulation

Avoids issues with interpolation at cost of performance

(not sure how bad this is)

* Address Feedback
This commit is contained in:
riperiperi 2023-10-20 14:05:09 +01:00 committed by GitHub
parent 28dd7d80af
commit 76b53e018a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 71 additions and 11 deletions

View file

@ -38,6 +38,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureGatherOffsets;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsVertexStoreAndAtomics;
public readonly bool SupportsViewportIndexVertexTessellation;
@ -92,6 +93,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsShaderBallot,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureGatherOffsets,
bool supportsTextureShadowLod,
bool supportsVertexStoreAndAtomics,
bool supportsViewportIndexVertexTessellation,
@ -142,6 +144,7 @@ namespace Ryujinx.Graphics.GAL
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsVertexStoreAndAtomics = supportsVertexStoreAndAtomics;
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 5767;
private const uint CodeGenVersion = 5791;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View file

@ -186,6 +186,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
public bool QueryHostSupportsTextureGatherOffsets() => _context.Capabilities.SupportsTextureGatherOffsets;
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
public bool QueryHostSupportsTransformFeedback() => _context.Capabilities.SupportsTransformFeedback;

View file

@ -163,6 +163,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsVertexStoreAndAtomics: true,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,

View file

@ -339,6 +339,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
/// <summary>
/// Queries host GPU texture gather with multiple offsets support.
/// </summary>
/// <returns>True if the GPU and driver supports texture gather offsets, false otherwise</returns>
bool QueryHostSupportsTextureGatherOffsets()
{
return true;
}
/// <summary>
/// Queries host GPU texture shadow LOD support.
/// </summary>

View file

@ -303,7 +303,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
bool needsOffsetsEmulation = hasOffsets && !gpuAccessor.QueryHostSupportsTextureGatherOffsets();
bool hasInvalidOffset = needsOffsetsEmulation || ((hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset());
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
@ -402,12 +404,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
offsets[index] = offset;
}
if (!needsOffsetsEmulation)
{
hasInvalidOffset &= !areAllOffsetsConstant;
if (!hasInvalidOffset)
{
return node;
}
}
if (hasLodBias)
{
@ -434,13 +439,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
LinkedListNode<INode> oldNode = node;
if (isGather && !isShadow)
if (isGather && !isShadow && hasOffsets)
{
Operand[] newSources = new Operand[sources.Length];
sources.CopyTo(newSources, 0);
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
Operand[] texSizes = InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount);
int destIndex = 0;
@ -455,7 +460,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
Operand offset = Local();
Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
Operand intOffset = offsets[index + compIndex * coordsCount];
node.List.AddBefore(node, new Operation(
Instruction.FP32 | Instruction.Divide,
@ -478,7 +483,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
texOp.Format,
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
texOp.Binding,
1,
1 << 3, // W component: i=0, j=0
new[] { dests[destIndex++] },
newSources);
@ -502,7 +507,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
}
else
{
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
Operand[] texSizes = isGather
? InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount)
: InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
for (int index = 0; index < coordsCount; index++)
{
@ -549,6 +556,43 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
return node;
}
private static Operand[] InsertTextureBaseSize(
LinkedListNode<INode> node,
TextureOperation texOp,
Operand bindlessHandle,
int coordsCount)
{
Operand[] texSizes = new Operand[coordsCount];
for (int index = 0; index < coordsCount; index++)
{
texSizes[index] = Local();
Operand[] texSizeSources;
if (bindlessHandle != null)
{
texSizeSources = new Operand[] { bindlessHandle, Const(0) };
}
else
{
texSizeSources = new Operand[] { Const(0) };
}
node.List.AddBefore(node, new TextureOperation(
Instruction.TextureQuerySize,
texOp.Type,
texOp.Format,
texOp.Flags,
texOp.Binding,
index,
new[] { texSizes[index] },
texSizeSources));
}
return texSizes;
}
private static Operand[] InsertTextureLod(
LinkedListNode<INode> node,
TextureOperation texOp,

View file

@ -605,6 +605,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsShaderBallot: false,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
supportsTextureShadowLod: false,
supportsVertexStoreAndAtomics: features2.Features.VertexPipelineStoresAndAtomics,
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,