diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index c619b9bbc..4e6c6a5df 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -247,6 +247,17 @@ namespace Ryujinx.Graphics.Shader.Decoders { block.AddPushOp(op); } + else if (op.Name == InstName.Ldl || op.Name == InstName.Stl) + { + config.SetUsedFeature(FeatureFlags.LocalMemory); + } + else if (op.Name == InstName.Atoms || + op.Name == InstName.AtomsCas || + op.Name == InstName.Lds || + op.Name == InstName.Sts) + { + config.SetUsedFeature(FeatureFlags.SharedMemory); + } block.OpCodes.Add(op); diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 99d7bec97..40312f4a4 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -27,6 +27,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Atoms(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Atoms instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstAtoms op = context.GetOp(); Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2)); @@ -114,6 +120,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Lds(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Lds instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstLds op = context.GetOp(); EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); @@ -144,6 +156,12 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Sts(EmitterContext context) { + if (context.Config.Stage != ShaderStage.Compute) + { + context.Config.GpuAccessor.Log($"Sts instruction is not valid on \"{context.Config.Stage}\" stage."); + return; + } + InstSts op = context.GetOp(); EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index e55ed13da..59d35d906 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -21,6 +21,8 @@ namespace Ryujinx.Graphics.Shader.Translation RtLayer = 1 << 5, IaIndexing = 1 << 7, OaIndexing = 1 << 8, - FixedFuncAttr = 1 << 9 + FixedFuncAttr = 1 << 9, + LocalMemory = 1 << 10, + SharedMemory = 1 << 11 } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index c58e4828b..3a46f6e4e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -8,6 +8,11 @@ namespace Ryujinx.Graphics.Shader.Translation { class ResourceManager { + // Those values are used if the shader as local or shared memory access, + // but for some reason the supplied size was 0. + private const int DefaultLocalMemorySize = 128; + private const int DefaultSharedMemorySize = 4096; + private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; private readonly IGpuAccessor _gpuAccessor; @@ -23,12 +28,12 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly HashSet _usedConstantBufferBindings; - public int LocalMemoryId { get; } - public int SharedMemoryId { get; } + public int LocalMemoryId { get; private set; } + public int SharedMemoryId { get; private set; } public ShaderProperties Properties => _properties; - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties, int localMemorySize) + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) { _gpuAccessor = gpuAccessor; _properties = properties; @@ -48,21 +53,43 @@ namespace Ryujinx.Graphics.Shader.Translation LocalMemoryId = -1; SharedMemoryId = -1; + } - if (localMemorySize != 0) + public void SetCurrentLocalMemory(int size, bool isUsed) + { + if (isUsed) { - var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(localMemorySize, sizeof(uint))); + if (size <= 0) + { + size = DefaultLocalMemorySize; + } - LocalMemoryId = properties.AddLocalMemory(lmem); + var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(size, sizeof(uint))); + + LocalMemoryId = Properties.AddLocalMemory(lmem); } - - int sharedMemorySize = stage == ShaderStage.Compute ? gpuAccessor.QueryComputeSharedMemorySize() : 0; - - if (sharedMemorySize != 0) + else { - var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(sharedMemorySize, sizeof(uint))); + LocalMemoryId = -1; + } + } - SharedMemoryId = properties.AddSharedMemory(smem); + public void SetCurrentSharedMemory(int size, bool isUsed) + { + if (isUsed) + { + if (size <= 0) + { + size = DefaultSharedMemorySize; + } + + var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(size, sizeof(uint))); + + SharedMemoryId = Properties.AddSharedMemory(smem); + } + else + { + SharedMemoryId = -1; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index fa1250022..e50c9a845 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -126,9 +126,10 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize) { - Stage = stage; - GpuAccessor = gpuAccessor; - Options = options; + Stage = stage; + GpuAccessor = gpuAccessor; + Options = options; + LocalMemorySize = localMemorySize; _transformFeedbackDefinitions = new Dictionary(); @@ -143,7 +144,7 @@ namespace Ryujinx.Graphics.Shader.Translation _usedTextures = new Dictionary(); _usedImages = new Dictionary(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties(), localMemorySize); + ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) { @@ -192,7 +193,6 @@ namespace Ryujinx.Graphics.Shader.Translation ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; OutputTopology = header.OutputTopology; MaxOutputVertices = header.MaxOutputVertexCount; - LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); ImapTypes = header.ImapTypes; OmapTargets = header.OmapTargets; OmapSampleMask = header.OmapSampleMask; diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 9647b13f1..13c5e0e40 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -149,6 +149,17 @@ namespace Ryujinx.Graphics.Shader.Translation public ShaderProgram Translate(TranslatorContext other = null) { + bool usesLocalMemory = _config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); + + _config.ResourceManager.SetCurrentLocalMemory(_config.LocalMemorySize, usesLocalMemory); + + if (_config.Stage == ShaderStage.Compute) + { + bool usesSharedMemory = _config.UsedFeatures.HasFlag(FeatureFlags.SharedMemory); + + _config.ResourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); + } + FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); if (other != null) @@ -157,6 +168,7 @@ namespace Ryujinx.Graphics.Shader.Translation // We need to share the resource manager since both shaders accesses the same constant buffers. other._config.ResourceManager = _config.ResourceManager; + other._config.ResourceManager.SetCurrentLocalMemory(other._config.LocalMemorySize, other._config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory)); FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);