using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader.Cache; using Ryujinx.Graphics.Gpu.Shader.DiskCache; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.Threading; namespace Ryujinx.Graphics.Gpu.Shader { /// /// Memory cache of shader code. /// class ShaderCache : IDisposable { /// /// Default flags used on the shader translation process. /// public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode; private struct TranslatedShader { public readonly CachedShaderStage Shader; public readonly ShaderProgram Program; public TranslatedShader(CachedShaderStage shader, ShaderProgram program) { Shader = shader; Program = program; } } private struct TranslatedShaderVertexPair { public readonly CachedShaderStage VertexA; public readonly CachedShaderStage VertexB; public readonly ShaderProgram Program; public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program) { VertexA = vertexA; VertexB = vertexB; Program = program; } } private readonly GpuContext _context; private readonly ShaderDumper _dumper; private readonly Dictionary _cpPrograms; private readonly Dictionary _gpPrograms; private struct ProgramToSave { public readonly CachedShaderProgram CachedProgram; public readonly IProgram HostProgram; public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram) { CachedProgram = cachedProgram; HostProgram = hostProgram; } } private Queue _programsToSaveQueue; private readonly ComputeShaderCacheHashTable _computeShaderCache; private readonly ShaderCacheHashTable _graphicsShaderCache; private readonly DiskCacheHostStorage _diskCacheHostStorage; private readonly BackgroundDiskCacheWriter _cacheWriter; /// /// Event for signalling shader cache loading progress. /// public event Action ShaderCacheStateChanged; /// /// Creates a new instance of the shader cache. /// /// GPU context that the shader cache belongs to public ShaderCache(GpuContext context) { _context = context; _dumper = new ShaderDumper(); _cpPrograms = new Dictionary(); _gpPrograms = new Dictionary(); _programsToSaveQueue = new Queue(); string diskCacheTitleId = GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null ? CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId) : null; _computeShaderCache = new ComputeShaderCacheHashTable(); _graphicsShaderCache = new ShaderCacheHashTable(); _diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId); if (_diskCacheHostStorage.CacheEnabled) { _cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage); } } /// /// Processes the queue of shaders that must save their binaries to the disk cache. /// public void ProcessShaderCacheQueue() { // Check to see if the binaries for previously compiled shaders are ready, and save them out. while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave)) { ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false); if (result != ProgramLinkStatus.Incomplete) { if (result == ProgramLinkStatus.Success) { _cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary()); } _programsToSaveQueue.Dequeue(); } else { break; } } } /// /// Initialize the cache. /// /// Cancellation token to cancel the shader cache initialization process internal void Initialize(CancellationToken cancellationToken) { if (_diskCacheHostStorage.CacheEnabled) { if (!_diskCacheHostStorage.CacheExists()) { // If we don't have a shader cache on the new format, try to perform migration from the old shader cache. Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache..."); int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage); Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders."); } ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader( _context, _graphicsShaderCache, _computeShaderCache, _diskCacheHostStorage, cancellationToken, ShaderCacheStateUpdate); loader.LoadShaders(); int errorCount = loader.ErrorCount; if (errorCount != 0) { Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache."); } } } /// /// Shader cache state update handler. /// /// Current state of the shader cache load process /// Number of the current shader being processed /// Total number of shaders to process private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total) { ShaderCacheStateChanged?.Invoke(state, current, total); } /// /// Gets a compute shader from the cache. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU channel /// Texture pool state /// Compute engine state /// GPU virtual address of the binary shader code /// Compiled compute shader code public CachedShaderProgram GetComputeShader( GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, ulong gpuVa) { if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa)) { return cpShader; } if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode)) { _cpPrograms[gpuVa] = cpShader; return cpShader; } ShaderSpecializationState specState = new ShaderSpecializationState(computeState); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState); GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa); TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1)); cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader); _computeShaderCache.Add(cpShader); EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram)); _cpPrograms[gpuVa] = cpShader; return cpShader; } /// /// Gets a graphics shader program from the shader cache. /// This includes all the specified shader stages. /// /// /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU state /// GPU channel /// Texture pool state /// 3D engine state /// Addresses of the shaders for each stage /// Compiled graphics shader code public CachedShaderProgram GetGraphicsShader( ref ThreedClassState state, GpuChannel channel, GpuChannelPoolState poolState, GpuChannelGraphicsState graphicsState, ShaderAddresses addresses) { if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, poolState, graphicsState, gpShaders, addresses)) { return gpShaders; } if (_graphicsShaderCache.TryFind(channel, poolState, graphicsState, addresses, out gpShaders, out var cachedGuestCode)) { _gpPrograms[addresses] = gpShaders; return gpShaders; } TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state); ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors); GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors); ReadOnlySpan addressesSpan = addresses.AsSpan(); TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { ulong gpuVa = addressesSpan[stageIndex + 1]; if (gpuVa != 0) { GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex); TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa); if (nextStage != null) { currentStage.SetNextStage(nextStage); } if (stageIndex == 0 && addresses.VertexA != 0) { translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); } translatorContexts[stageIndex + 1] = currentStage; nextStage = currentStage; } } CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; List shaderSources = new List(); for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { TranslatorContext currentStage = translatorContexts[stageIndex + 1]; if (currentStage != null) { ShaderProgram program; if (stageIndex == 0 && translatorContexts[0] != null) { TranslatedShaderVertexPair translatedShader = TranslateShader( _dumper, channel, currentStage, translatorContexts[0], cachedGuestCode.VertexACode, cachedGuestCode.VertexBCode); shaders[0] = translatedShader.VertexA; shaders[1] = translatedShader.VertexB; program = translatedShader.Program; } else { byte[] code = cachedGuestCode.GetByIndex(stageIndex); TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); shaders[stageIndex + 1] = translatedShader.Shader; program = translatedShader.Program; } if (program != null) { shaderSources.Add(CreateShaderSource(program)); } } } int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1; IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap)); gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); _graphicsShaderCache.Add(gpShaders); EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram)); _gpPrograms[addresses] = gpShaders; return gpShaders; } /// /// Creates a shader source for use with the backend from a translated shader program. /// /// Translated shader program /// Shader source public static ShaderSource CreateShaderSource(ShaderProgram program) { return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language); } /// /// Puts a program on the queue of programs to be saved on the disk cache. /// /// /// This will not do anything if disk shader cache is disabled. /// /// Program to be saved on disk private void EnqueueProgramToSave(ProgramToSave programToSave) { if (_diskCacheHostStorage.CacheEnabled) { _programsToSaveQueue.Enqueue(programToSave); } } /// /// Gets transform feedback state from the current GPU state. /// /// Current GPU state /// Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state) { bool tfEnable = state.TfEnable; if (!tfEnable) { return null; } TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers]; for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++) { var tf = state.TfState[i]; descs[i] = new TransformFeedbackDescriptor( tf.BufferIndex, tf.Stride, tf.VaryingsCount, ref state.TfVaryingLocations[i]); } return descs; } /// /// Checks if compute shader code in memory is equal to the cached shader. /// /// GPU channel using the shader /// GPU channel state to verify shader compatibility /// Cached compute shader /// GPU virtual address of the shader code in memory /// True if the code is different, false otherwise private static bool IsShaderEqual( GpuChannel channel, GpuChannelPoolState poolState, CachedShaderProgram cpShader, ulong gpuVa) { if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) { return cpShader.SpecializationState.MatchesCompute(channel, poolState); } return false; } /// /// Checks if graphics shader code from all stages in memory are equal to the cached shaders. /// /// GPU channel using the shader /// GPU channel state to verify shader compatibility /// GPU channel graphics state to verify shader compatibility /// Cached graphics shaders /// GPU virtual addresses of all enabled shader stages /// True if the code is different, false otherwise private static bool IsShaderEqual( GpuChannel channel, GpuChannelPoolState poolState, GpuChannelGraphicsState graphicsState, CachedShaderProgram gpShaders, ShaderAddresses addresses) { ReadOnlySpan addressesSpan = addresses.AsSpan(); for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++) { CachedShaderStage shader = gpShaders.Shaders[stageIndex]; ulong gpuVa = addressesSpan[stageIndex]; if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa)) { return false; } } return gpShaders.SpecializationState.MatchesGraphics(channel, poolState, graphicsState); } /// /// Checks if the code of the specified cached shader is different from the code in memory. /// /// Memory manager used to access the GPU memory where the shader is located /// Cached shader to compare with /// GPU virtual address of the binary shader code /// True if the code is different, false otherwise private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa) { if (shader == null) { return true; } ReadOnlySpan memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length); return memoryCode.SequenceEqual(shader.Code); } /// /// Decode the binary Maxwell shader code to a translator context. /// /// GPU state accessor /// GPU virtual address of the binary shader code /// The generated translator context public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa) { var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute); return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Decode the binary Maxwell shader code to a translator context. /// /// /// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader. /// /// GPU state accessor /// Flags that controls shader translation /// GPU virtual address of the shader code /// The generated translator context public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa) { var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); return Translator.CreateContext(gpuVa, gpuAccessor, options); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper /// GPU channel using the shader /// Translator context of the stage to be translated /// Optional translator context of the shader that should be combined /// Optional Maxwell binary code of the Vertex A shader, if present /// Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache /// Compiled graphics shader code private static TranslatedShaderVertexPair TranslateShader( ShaderDumper dumper, GpuChannel channel, TranslatorContext currentStage, TranslatorContext vertexA, byte[] codeA, byte[] codeB) { ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); var memoryManager = channel.MemoryManager; codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray(); byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray(); ShaderDumpPaths pathsA = default; ShaderDumpPaths pathsB = default; if (dumper != null) { pathsA = dumper.Dump(codeA, compute: false); pathsB = dumper.Dump(codeB, compute: false); } ShaderProgram program = currentStage.Translate(vertexA); pathsB.Prepend(program); pathsA.Prepend(program); CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA); CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB); return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program); } /// /// Translates a previously generated translator context to something that the host API accepts. /// /// Optional shader code dumper /// GPU channel using the shader /// Translator context of the stage to be translated /// Optional Maxwell binary code of the current stage shader, if present on cache /// Compiled graphics shader code private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) { var memoryManager = channel.MemoryManager; ulong cb1DataAddress = context.Stage == ShaderStage.Compute ? channel.BufferManager.GetComputeUniformBufferAddress(1) : channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1); byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray(); code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; ShaderProgram program = context.Translate(); paths.Prepend(program); return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program); } /// /// Gets the index of a stage from a . /// /// Stage to get the index from /// Stage index private static int StageToStageIndex(ShaderStage stage) { return stage switch { ShaderStage.TessellationControl => 1, ShaderStage.TessellationEvaluation => 2, ShaderStage.Geometry => 3, ShaderStage.Fragment => 4, _ => 0 }; } /// /// Disposes the shader cache, deleting all the cached shaders. /// It's an error to use the shader cache after disposal. /// public void Dispose() { foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms()) { program.Dispose(); } foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms()) { program.Dispose(); } _cacheWriter?.Dispose(); } } }