diff --git a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 2bdb85bf0..e0ad30fe0 100644 --- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 4037; + private const uint CodeGenVersion = 4028; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs index 1be638684..3915c0d55 100644 --- a/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs +++ b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs @@ -48,5 +48,10 @@ namespace Ryujinx.Graphics.Shader.Translation _ => 0 }; } + + public static int GetConstantUbeOffset(int slot) + { + return UbeBaseOffset + slot * StorageDescSize; + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index ec8fca1da..c280a6d80 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -8,11 +8,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { - public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask) + public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) { int sbStart = GetStorageBaseCbOffset(config.Stage); int sbEnd = sbStart + StorageDescsSize; + int ubeStart = UbeBaseOffset; + int ubeEnd = UbeBaseOffset + UbeDescsSize; + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) { for (int index = 0; index < node.Value.SourcesCount; index++) @@ -25,6 +28,16 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { sbUseMask |= 1 << storageIndex; } + + if (config.Stage == ShaderStage.Compute) + { + int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); + + if (constantIndex >= 0) + { + ubeUseMask |= 1 << constantIndex; + } + } } if (!(node.Value is Operation operation)) @@ -54,7 +67,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations // so NVN "emulates" more constant buffers using global memory access. // Here we try to replace the global access back to a constant buffer // load. - storageIndex = SearchForStorageBase(block, source, UbeBaseOffset, UbeBaseOffset + UbeDescsSize); + storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd); if (storageIndex >= 0) { @@ -64,7 +77,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - config.SetAccessibleStorageBuffersMask(sbUseMask); + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); } private static LinkedListNode ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode node, ShaderConfig config, int storageIndex) diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index a1a2054c0..a2219b36d 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -12,16 +12,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations RunOptimizationPasses(blocks); int sbUseMask = 0; + int ubeUseMask = 0; // Those passes are looking for specific patterns and only needs to run once. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask); + GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask); BindlessToIndexed.RunPass(blocks[blkIndex], config); BindlessElimination.RunPass(blocks[blkIndex], config); } - config.SetAccessibleStorageBuffersMask(sbUseMask); + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); // Run optimizations one last time to remove any code that is now optimizable after above passes. RunOptimizationPasses(blocks); diff --git a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index c4d2c5d90..0c3c4a57d 100644 --- a/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -1,3 +1,4 @@ +using Ryujinx.Graphics.Shader.Decoders; using Ryujinx.Graphics.Shader.IntermediateRepresentation; using System.Collections.Generic; using System.Diagnostics; @@ -89,12 +90,42 @@ namespace Ryujinx.Graphics.Shader.Translation return local; } + Operand PrependExistingOperation(Operation operation) + { + Operand local = Local(); + + operation.Dest = local; + node.List.AddBefore(node, operation); + + return local; + } + Operand addrLow = operation.GetSource(0); Operand addrHigh = operation.GetSource(1); Operand sbBaseAddrLow = Const(0); Operand sbSlot = Const(0); + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) + { + baseAddrLow = Cbuf(0, cbOffset); + Operand baseAddrHigh = Cbuf(0, cbOffset + 1); + Operand size = Cbuf(0, cbOffset + 2); + + Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); + Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); + + Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); + + Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); + + Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); + + return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); + } + int sbUseMask = config.AccessibleStorageBuffersMask; while (sbUseMask != 0) @@ -107,20 +138,7 @@ namespace Ryujinx.Graphics.Shader.Translation int cbOffset = GetStorageCbOffset(config.Stage, slot); - Operand baseAddrLow = Cbuf(0, cbOffset); - Operand baseAddrHigh = Cbuf(0, cbOffset + 1); - Operand size = Cbuf(0, cbOffset + 2); - - Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); - Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); - - Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); - - Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); - - Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); - - Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); @@ -128,8 +146,6 @@ namespace Ryujinx.Graphics.Shader.Translation if (config.AccessibleStorageBuffersMask != 0) { - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); - Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask); Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); @@ -178,6 +194,46 @@ namespace Ryujinx.Graphics.Shader.Translation storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); } + if (operation.Inst == Instruction.LoadGlobal) + { + int cbeUseMask = config.AccessibleConstantBuffersMask; + + while (cbeUseMask != 0) + { + int slot = BitOperations.TrailingZeroCount(cbeUseMask); + int cbSlot = UbeFirstCbuf + slot; + + cbeUseMask &= ~(1 << slot); + + config.SetUsedConstantBuffer(cbSlot); + + Operand previousResult = PrependExistingOperation(storageOp); + + int cbOffset = GetConstantUbeOffset(slot); + + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); + + Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask); + Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst); + + Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2)); + + Operand[] sourcesCb = new Operand[operation.SourcesCount]; + + sourcesCb[0] = Const(cbSlot); + sourcesCb[1] = cbIndex; + + for (int index = 2; index < operation.SourcesCount; index++) + { + sourcesCb[index] = operation.GetSource(index); + } + + Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb); + + storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult); + } + } + for (int index = 0; index < operation.SourcesCount; index++) { operation.SetSource(index, null); diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 85b56b51f..a79ef6f57 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -66,6 +66,7 @@ namespace Ryujinx.Graphics.Shader.Translation public UInt128 ThisInputAttributesComponents { get; private set; } public int AccessibleStorageBuffersMask { get; private set; } + public int AccessibleConstantBuffersMask { get; private set; } private int _usedConstantBuffers; private int _usedStorageBuffers; @@ -100,7 +101,8 @@ namespace Ryujinx.Graphics.Shader.Translation GpuAccessor = gpuAccessor; Options = options; - AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; + AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; + AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1; UsedInputAttributesPerPatch = new HashSet(); UsedOutputAttributesPerPatch = new HashSet(); @@ -121,6 +123,11 @@ namespace Ryujinx.Graphics.Shader.Translation OutputTopology = outputTopology; MaxOutputVertices = maxOutputVertices; TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + + if (Stage != ShaderStage.Compute) + { + AccessibleConstantBuffersMask = 0; + } } public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) @@ -404,9 +411,10 @@ namespace Ryujinx.Graphics.Shader.Translation UsedFeatures |= flags; } - public void SetAccessibleStorageBuffersMask(int mask) + public void SetAccessibleBufferMasks(int sbMask, int ubeMask) { - AccessibleStorageBuffersMask = mask; + AccessibleStorageBuffersMask = sbMask; + AccessibleConstantBuffersMask = ubeMask; } public void SetUsedConstantBuffer(int slot)