Remove barrier on Intel if control flow is potentially divergent (#5044)

* Remove barrier on Intel if control flow is potentially divergent

* Shader cache version bump
This commit is contained in:
gdkchan 2023-06-08 17:43:16 -03:00 committed by GitHub
parent fe30c03cac
commit 2cdcfe46d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 64 additions and 8 deletions

View file

@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
public readonly bool SupportsCubemapView;
public readonly bool SupportsNonConstantTextureOffset;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportIndexVertexTessellation;
@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsCubemapView,
bool supportsNonConstantTextureOffset,
bool supportsShaderBallot,
bool supportsShaderBarrierDivergence,
bool supportsShaderFloat64,
bool supportsTextureShadowLod,
bool supportsViewportIndexVertexTessellation,
@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL
SupportsCubemapView = supportsCubemapView;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 5159;
private const uint CodeGenVersion = 5044;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View file

@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;

View file

@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL
public Capabilities GetCapabilities()
{
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
return new Capabilities(
@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL
supportsCubemapView: true,
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,

View file

@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
for (int i = 1; i < info.Functions.Count; i++)
{
PrintFunction(context, info, info.Functions[i]);
PrintFunction(context, info.Functions[i]);
context.AppendLine();
}
}
PrintFunction(context, info, info.Functions[0], MainFunctionName);
PrintFunction(context, info.Functions[0], MainFunctionName);
return context.GetCode();
}
private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null)
{
context.CurrentFunction = function;
@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
Declarations.DeclareLocals(context, function);
PrintBlock(context, function.MainBlock);
PrintBlock(context, function.MainBlock, funcName == MainFunctionName);
context.LeaveScope();
}
@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
}
private static void PrintBlock(CodeGenContext context, AstBlock block)
private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
{
AstBlockVisitor visitor = new AstBlockVisitor(block);
@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
};
bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence();
bool mayHaveReturned = false;
foreach (IAstNode node in visitor.Visit())
{
if (node is AstOperation operation)
{
if (!supportsBarrierDivergence)
{
if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
{
// Barrier on divergent control flow paths may cause the GPU to hang,
// so skip emitting the barrier for those cases.
if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
{
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
continue;
}
}
else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
{
mayHaveReturned = true;
}
}
string expr = InstGen.GetExpression(context, operation);
if (expr != null)

View file

@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
public SpirvDelegates Delegates { get; }
public bool IsMainFunction { get; private set; }
public bool MayHaveReturned { get; set; }
public CodeGenContext(
StructuredProgramInfo info,
ShaderConfig config,
@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
Delegates = new SpirvDelegates(this);
}
public void StartFunction()
public void StartFunction(bool isMainFunction)
{
IsMainFunction = isMainFunction;
MayHaveReturned = false;
_locals.Clear();
_localForArgs.Clear();
_funcArgs.Clear();

View file

@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
{
// Barrier on divergent control flow paths may cause the GPU to hang,
// so skip emitting the barrier for those cases.
if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() &&
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
{
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
return OperationResult.Invalid;
}
context.ControlBarrier(
context.Constant(context.TypeU32(), Scope.Workgroup),
context.Constant(context.TypeU32(), Scope.Workgroup),
@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
{
context.MayHaveReturned = true;
if (operation.SourcesCount != 0)
{
context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));

View file

@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
context.CurrentFunction = function;
context.AddFunction(spvFunc);
context.StartFunction();
context.StartFunction(isMainFunction: funcIndex == 0);
Declarations.DeclareParameters(context, function);

View file

@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
return true;
}
/// <summary>
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
/// </summary>
/// <returns>True if the GPU supports barriers on divergent control flow paths, false otherwise</returns>
bool QueryHostSupportsShaderBarrierDivergence()
{
return true;
}
/// <summary>
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
/// </summary>

View file

@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
supportsCubemapView: !IsAmdGcn,
supportsNonConstantTextureOffset: false,
supportsShaderBallot: false,
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
supportsTextureShadowLod: false,
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,