Improve shader global memory to storage pass (#2200)

* Improve shader global memory to storage pass

* Formatting and more comments

* Shader cache version bump
This commit is contained in:
gdkchan 2021-04-18 07:31:39 -03:00 committed by GitHub
parent 7719909397
commit 40e276c9b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 129 deletions

View file

@ -35,7 +35,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary> /// <summary>
/// Version of the codegen (to be changed when codegen or guest format change). /// Version of the codegen (to be changed when codegen or guest format change).
/// </summary> /// </summary>
private const ulong ShaderCodeGenVersion = 2163; private const ulong ShaderCodeGenVersion = 2200;
// Progress reporting helpers // Progress reporting helpers
private volatile int _shaderCount; private volatile int _shaderCount;

View file

@ -5,66 +5,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
class BindlessElimination class BindlessElimination
{ {
private static Operation FindBranchSource(BasicBlock block)
{
foreach (BasicBlock sourceBlock in block.Predecessors)
{
if (sourceBlock.Operations.Count > 0)
{
Operation lastOp = sourceBlock.Operations.Last.Value as Operation;
if (lastOp != null &&
((sourceBlock.Next == block && lastOp.Inst == Instruction.BranchIfFalse) ||
(sourceBlock.Branch == block && lastOp.Inst == Instruction.BranchIfTrue)))
{
return lastOp;
}
}
}
return null;
}
private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
{
// Check if all the conditions for the query block are satisfied by the current block.
// Just checks the top-most conditional for now.
Operation currentBranch = FindBranchSource(currentBlock);
Operation queryBranch = FindBranchSource(queryBlock);
Operand currentCondition = currentBranch?.GetSource(0);
Operand queryCondition = queryBranch?.GetSource(0);
// The condition should be the same operand instance.
return currentBranch != null && queryBranch != null &&
currentBranch.Inst == queryBranch.Inst &&
currentCondition == queryCondition;
}
private static Operand FindLastOperation(Operand source, BasicBlock block)
{
if (source.AsgOp is PhiNode phiNode)
{
// This source can have a different value depending on a previous branch.
// Ensure that conditions met for that branch are also met for the current one.
// Prefer the latest sources for the phi node.
for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
{
BasicBlock phiBlock = phiNode.GetBlock(i);
if (BlockConditionsMatch(block, phiBlock))
{
return phiNode.GetSource(i);
}
}
}
return source;
}
public static void RunPass(BasicBlock block, ShaderConfig config) public static void RunPass(BasicBlock block, ShaderConfig config)
{ {
// We can turn a bindless into regular access by recognizing the pattern // We can turn a bindless into regular access by recognizing the pattern
@ -89,7 +29,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
texOp.Inst == Instruction.TextureSample || texOp.Inst == Instruction.TextureSample ||
texOp.Inst == Instruction.TextureSize) texOp.Inst == Instruction.TextureSize)
{ {
Operand bindlessHandle = FindLastOperation(texOp.GetSource(0), block); Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block);
if (bindlessHandle.Type == OperandType.ConstantBuffer) if (bindlessHandle.Type == OperandType.ConstantBuffer)
{ {
@ -107,8 +47,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue; continue;
} }
Operand src0 = FindLastOperation(handleCombineOp.GetSource(0), block); Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
Operand src1 = FindLastOperation(handleCombineOp.GetSource(1), block); Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
if (src0.Type != OperandType.ConstantBuffer || if (src0.Type != OperandType.ConstantBuffer ||
src1.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != src1.GetCbufSlot()) src1.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != src1.GetCbufSlot())
@ -120,7 +60,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
} }
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore) else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
{ {
Operand src0 = FindLastOperation(texOp.GetSource(0), block); Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
if (src0.Type == OperandType.ConstantBuffer) if (src0.Type == OperandType.ConstantBuffer)
{ {

View file

@ -25,32 +25,29 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
Operand source = operation.GetSource(0); Operand source = operation.GetSource(0);
if (source.AsgOp is Operation asgOperation) int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
if (storageIndex >= 0)
{ {
int storageIndex = SearchForStorageBase(asgOperation, sbStart, sbEnd); // Storage buffers are implemented using global memory access.
// If we know from where the base address of the access is loaded,
// we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage
// buffer access.
node = ReplaceGlobalWithStorage(node, config, storageIndex);
}
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
{
// Here we effectively try to replace a LDG instruction with LDC.
// The hardware only supports a limited amount of constant buffers
// so NVN "emulates" more constant buffers using global memory access.
// Here we try to replace the global access back to a constant buffer
// load.
storageIndex = SearchForStorageBase(block, source, UbeBaseOffset, UbeBaseOffset + UbeDescsSize);
if (storageIndex >= 0) if (storageIndex >= 0)
{ {
// Storage buffers are implemented using global memory access. node = ReplaceLdgWithLdc(node, config, storageIndex);
// If we know from where the base address of the access is loaded,
// we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage
// buffer access.
node = ReplaceGlobalWithStorage(node, config, storageIndex);
}
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
{
// Here we effectively try to replace a LDG instruction with LDC.
// The hardware only supports a limited amount of constant buffers
// so NVN "emulates" more constant buffers using global memory access.
// Here we try to replace the global access back to a constant buffer
// load.
storageIndex = SearchForStorageBase(asgOperation, UbeBaseOffset, UbeBaseOffset + UbeDescsSize);
if (storageIndex >= 0)
{
node = ReplaceLdgWithLdc(node, config, storageIndex);
}
} }
} }
} }
@ -184,35 +181,70 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return node; return node;
} }
private static int SearchForStorageBase(Operation operation, int sbStart, int sbEnd) private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
{ {
Queue<Operation> assignments = new Queue<Operation>(); globalAddress = Utils.FindLastOperation(globalAddress, block);
assignments.Enqueue(operation); if (globalAddress.Type == OperandType.ConstantBuffer)
while (assignments.TryDequeue(out operation))
{ {
for (int index = 0; index < operation.SourcesCount; index++) return GetStorageIndex(globalAddress, sbStart, sbEnd);
}
Operation operation = globalAddress.AsgOp as Operation;
if (operation == null || operation.Inst != Instruction.Add)
{
return -1;
}
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
(src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
{
if (src1.Type == OperandType.LocalVariable)
{ {
Operand source = operation.GetSource(index); operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
}
else
{
operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
}
if (source.Type == OperandType.ConstantBuffer) if (operation == null || operation.Inst != Instruction.Add)
{ {
int slot = source.GetCbufSlot(); return -1;
int offset = source.GetCbufOffset(); }
}
if (slot == 0 && offset >= sbStart && offset < sbEnd) for (int index = 0; index < operation.SourcesCount; index++)
{ {
int storageIndex = (offset - sbStart) / StorageDescSize; Operand source = operation.GetSource(index);
return storageIndex; int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
}
}
if (source.AsgOp is Operation asgOperation) if (storageIndex != -1)
{ {
assignments.Enqueue(asgOperation); return storageIndex;
} }
}
return -1;
}
private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
{
if (operand.Type == OperandType.ConstantBuffer)
{
int slot = operand.GetCbufSlot();
int offset = operand.GetCbufOffset();
if (slot == 0 && offset >= sbStart && offset < sbEnd)
{
int storageIndex = (offset - sbStart) / StorageDescSize;
return storageIndex;
} }
} }

View file

@ -10,11 +10,22 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
public static void RunPass(BasicBlock[] blocks, ShaderConfig config) public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
{ {
RunOptimizationPasses(blocks);
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{ {
GlobalToStorage.RunPass(blocks[blkIndex], config); GlobalToStorage.RunPass(blocks[blkIndex], config);
BindlessToIndexed.RunPass(blocks[blkIndex]);
BindlessElimination.RunPass(blocks[blkIndex], config);
} }
// Run optimizations one last time to remove any code that is now optimizable after above passes.
RunOptimizationPasses(blocks);
}
private static void RunOptimizationPasses(BasicBlock[] blocks)
{
bool modified; bool modified;
do do
@ -85,27 +96,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
} }
} }
while (modified); while (modified);
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
BindlessToIndexed.RunPass(blocks[blkIndex]);
BindlessElimination.RunPass(blocks[blkIndex], config);
// Try to eliminate any operations that are now unused.
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;
while (node != null)
{
LinkedListNode<INode> nextNode = node.Next;
if (IsUnused(node.Value))
{
RemoveNode(blocks[blkIndex], node);
}
node = nextNode;
}
}
} }
private static void PropagateCopy(Operation copyOp) private static void PropagateCopy(Operation copyOp)

View file

@ -0,0 +1,67 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Utils
{
private static Operation FindBranchSource(BasicBlock block)
{
foreach (BasicBlock sourceBlock in block.Predecessors)
{
if (sourceBlock.Operations.Count > 0)
{
Operation lastOp = sourceBlock.Operations.Last.Value as Operation;
if (lastOp != null &&
((sourceBlock.Next == block && lastOp.Inst == Instruction.BranchIfFalse) ||
(sourceBlock.Branch == block && lastOp.Inst == Instruction.BranchIfTrue)))
{
return lastOp;
}
}
}
return null;
}
private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
{
// Check if all the conditions for the query block are satisfied by the current block.
// Just checks the top-most conditional for now.
Operation currentBranch = FindBranchSource(currentBlock);
Operation queryBranch = FindBranchSource(queryBlock);
Operand currentCondition = currentBranch?.GetSource(0);
Operand queryCondition = queryBranch?.GetSource(0);
// The condition should be the same operand instance.
return currentBranch != null && queryBranch != null &&
currentBranch.Inst == queryBranch.Inst &&
currentCondition == queryCondition;
}
public static Operand FindLastOperation(Operand source, BasicBlock block)
{
if (source.AsgOp is PhiNode phiNode)
{
// This source can have a different value depending on a previous branch.
// Ensure that conditions met for that branch are also met for the current one.
// Prefer the latest sources for the phi node.
for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
{
BasicBlock phiBlock = phiNode.GetBlock(i);
if (BlockConditionsMatch(block, phiBlock))
{
return phiNode.GetSource(i);
}
}
}
return source;
}
}
}