Ryujinx/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
gdkchan 43ebd7a9bb
New shader cache implementation (#3194)
* New shader cache implementation

* Remove some debug code

* Take transform feedback varying count into account

* Create shader cache directory if it does not exist + fragment output map related fixes

* Remove debug code

* Only check texture descriptors if the constant buffer is bound

* Also check CPU VA on GetSpanMapped

* Remove more unused code and move cache related code

* XML docs + remove more unused methods

* Better codegen for TransformFeedbackDescriptor.AsSpan

* Support migration from old cache format, remove more unused code

Shader cache rebuild now also rewrites the shared toc and data files

* Fix migration error with BRX shaders

* Add a limit to the async translation queue

 Avoid async translation threads not being able to keep up and the queue growing very large

* Re-create specialization state on recompile

This might be required if a new version of the shader translator requires more or less state, or if there is a bug related to the GPU state access

* Make shader cache more error resilient

* Add some missing XML docs and move GpuAccessor docs to the interface/use inheritdoc

* Address early PR feedback

* Fix rebase

* Remove IRenderer.CompileShader and IShader interface, replace with new ShaderSource struct passed to CreateProgram directly

* Handle some missing exceptions

* Make shader cache purge delete both old and new shader caches

* Register textures on new specialization state

* Translate and compile shaders in forward order (eliminates diffs due to different binding numbers)

* Limit in-flight shader compilation to the maximum number of compilation threads

* Replace ParallelDiskCacheLoader state changed event with a callback function

* Better handling for invalid constant buffer 1 data length

* Do not create the old cache directory structure if the old cache does not exist

* Constant buffer use should be per-stage. This change will invalidate existing new caches (file format version was incremented)

* Replace rectangle texture with just coordinate normalization

* Skip incompatible shaders that are missing texture information, instead of crashing

This is required if we, for example, support new texture instruction to the shader translator, and then they allow access to textures that were not accessed before. In this scenario, the old cache entry is no longer usable

* Fix coordinates normalization on cubemap textures

* Check if title ID is null before combining shader cache path

* More robust constant buffer address validation on spec state

* More robust constant buffer address validation on spec state (2)

* Regenerate shader cache with one stream, rather than one per shader.

* Only create shader cache directory during initialization

* Logging improvements

* Proper shader program disposal

* PR feedback, and add a comment on serialized structs

* XML docs for RegisterTexture

Co-authored-by: riperiperi <rhy3756547@hotmail.com>
2022-04-10 10:49:44 -03:00

615 lines
26 KiB
C#

using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.Cache;
using Ryujinx.Graphics.Gpu.Shader.DiskCache;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Threading;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Memory cache of shader code.
/// </summary>
class ShaderCache : IDisposable
{
/// <summary>
/// Default flags used on the shader translation process.
/// </summary>
public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode;
private struct TranslatedShader
{
public readonly CachedShaderStage Shader;
public readonly ShaderProgram Program;
public TranslatedShader(CachedShaderStage shader, ShaderProgram program)
{
Shader = shader;
Program = program;
}
}
private struct TranslatedShaderVertexPair
{
public readonly CachedShaderStage VertexA;
public readonly CachedShaderStage VertexB;
public readonly ShaderProgram Program;
public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program)
{
VertexA = vertexA;
VertexB = vertexB;
Program = program;
}
}
private readonly GpuContext _context;
private readonly ShaderDumper _dumper;
private readonly Dictionary<ulong, CachedShaderProgram> _cpPrograms;
private readonly Dictionary<ShaderAddresses, CachedShaderProgram> _gpPrograms;
private struct ProgramToSave
{
public readonly CachedShaderProgram CachedProgram;
public readonly IProgram HostProgram;
public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram)
{
CachedProgram = cachedProgram;
HostProgram = hostProgram;
}
}
private Queue<ProgramToSave> _programsToSaveQueue;
private readonly ComputeShaderCacheHashTable _computeShaderCache;
private readonly ShaderCacheHashTable _graphicsShaderCache;
private readonly DiskCacheHostStorage _diskCacheHostStorage;
private readonly BackgroundDiskCacheWriter _cacheWriter;
/// <summary>
/// Event for signalling shader cache loading progress.
/// </summary>
public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged;
/// <summary>
/// Creates a new instance of the shader cache.
/// </summary>
/// <param name="context">GPU context that the shader cache belongs to</param>
public ShaderCache(GpuContext context)
{
_context = context;
_dumper = new ShaderDumper();
_cpPrograms = new Dictionary<ulong, CachedShaderProgram>();
_gpPrograms = new Dictionary<ShaderAddresses, CachedShaderProgram>();
_programsToSaveQueue = new Queue<ProgramToSave>();
string diskCacheTitleId = GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null
? CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId)
: null;
_computeShaderCache = new ComputeShaderCacheHashTable();
_graphicsShaderCache = new ShaderCacheHashTable();
_diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId);
if (_diskCacheHostStorage.CacheEnabled)
{
_cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage);
}
}
/// <summary>
/// Processes the queue of shaders that must save their binaries to the disk cache.
/// </summary>
public void ProcessShaderCacheQueue()
{
// Check to see if the binaries for previously compiled shaders are ready, and save them out.
while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave))
{
ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false);
if (result != ProgramLinkStatus.Incomplete)
{
if (result == ProgramLinkStatus.Success)
{
_cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary());
}
_programsToSaveQueue.Dequeue();
}
else
{
break;
}
}
}
/// <summary>
/// Initialize the cache.
/// </summary>
/// <param name="cancellationToken">Cancellation token to cancel the shader cache initialization process</param>
internal void Initialize(CancellationToken cancellationToken)
{
if (_diskCacheHostStorage.CacheEnabled)
{
if (!_diskCacheHostStorage.CacheExists())
{
// If we don't have a shader cache on the new format, try to perform migration from the old shader cache.
Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache...");
int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage);
Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders.");
}
ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader(
_context,
_graphicsShaderCache,
_computeShaderCache,
_diskCacheHostStorage,
cancellationToken,
ShaderCacheStateUpdate);
loader.LoadShaders();
int errorCount = loader.ErrorCount;
if (errorCount != 0)
{
Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache.");
}
}
}
/// <summary>
/// Shader cache state update handler.
/// </summary>
/// <param name="state">Current state of the shader cache load process</param>
/// <param name="current">Number of the current shader being processed</param>
/// <param name="total">Total number of shaders to process</param>
private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total)
{
ShaderCacheStateChanged?.Invoke(state, current, total);
}
/// <summary>
/// Gets a compute shader from the cache.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute engine state</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>Compiled compute shader code</returns>
public CachedShaderProgram GetComputeShader(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa)
{
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa))
{
return cpShader;
}
if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
ShaderSpecializationState specState = new ShaderSpecializationState(computeState);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState);
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1));
cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
_computeShaderCache.Add(cpShader);
EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram));
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
/// <summary>
/// Gets a graphics shader program from the shader cache.
/// This includes all the specified shader stages.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="state">GPU state</param>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">3D engine state</param>
/// <param name="addresses">Addresses of the shaders for each stage</param>
/// <returns>Compiled graphics shader code</returns>
public CachedShaderProgram GetGraphicsShader(
ref ThreedClassState state,
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelGraphicsState graphicsState,
ShaderAddresses addresses)
{
if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, poolState, gpShaders, addresses))
{
return gpShaders;
}
if (_graphicsShaderCache.TryFind(channel, poolState, addresses, out gpShaders, out var cachedGuestCode))
{
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state);
ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors);
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
{
ulong gpuVa = addressesSpan[stageIndex + 1];
if (gpuVa != 0)
{
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex);
TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa);
if (nextStage != null)
{
currentStage.SetNextStage(nextStage);
}
if (stageIndex == 0 && addresses.VertexA != 0)
{
translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
}
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
}
}
CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
List<ShaderSource> shaderSources = new List<ShaderSource>();
for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
{
TranslatorContext currentStage = translatorContexts[stageIndex + 1];
if (currentStage != null)
{
ShaderProgram program;
if (stageIndex == 0 && translatorContexts[0] != null)
{
TranslatedShaderVertexPair translatedShader = TranslateShader(
_dumper,
channel,
currentStage,
translatorContexts[0],
cachedGuestCode.VertexACode,
cachedGuestCode.VertexBCode);
shaders[0] = translatedShader.VertexA;
shaders[1] = translatedShader.VertexB;
program = translatedShader.Program;
}
else
{
byte[] code = cachedGuestCode.GetByIndex(stageIndex);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
shaders[stageIndex + 1] = translatedShader.Shader;
program = translatedShader.Program;
}
if (program != null)
{
shaderSources.Add(CreateShaderSource(program));
}
}
}
int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1;
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap));
gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
_graphicsShaderCache.Add(gpShaders);
EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram));
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
/// <summary>
/// Creates a shader source for use with the backend from a translated shader program.
/// </summary>
/// <param name="program">Translated shader program</param>
/// <returns>Shader source</returns>
public static ShaderSource CreateShaderSource(ShaderProgram program)
{
return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language);
}
/// <summary>
/// Puts a program on the queue of programs to be saved on the disk cache.
/// </summary>
/// <remarks>
/// This will not do anything if disk shader cache is disabled.
/// </remarks>
/// <param name="programToSave">Program to be saved on disk</param>
private void EnqueueProgramToSave(ProgramToSave programToSave)
{
if (_diskCacheHostStorage.CacheEnabled)
{
_programsToSaveQueue.Enqueue(programToSave);
}
}
/// <summary>
/// Gets transform feedback state from the current GPU state.
/// </summary>
/// <param name="state">Current GPU state</param>
/// <returns>Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled</returns>
private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state)
{
bool tfEnable = state.TfEnable;
if (!tfEnable)
{
return null;
}
TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers];
for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++)
{
var tf = state.TfState[i];
descs[i] = new TransformFeedbackDescriptor(
tf.BufferIndex,
tf.Stride,
tf.VaryingsCount,
ref state.TfVaryingLocations[i]);
}
return descs;
}
/// <summary>
/// Checks if compute shader code in memory is equal to the cached shader.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
CachedShaderProgram cpShader,
ulong gpuVa)
{
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{
return cpShader.SpecializationState.MatchesCompute(channel, poolState);
}
return false;
}
/// <summary>
/// Checks if graphics shader code from all stages in memory are equal to the cached shaders.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="gpShaders">Cached graphics shaders</param>
/// <param name="addresses">GPU virtual addresses of all enabled shader stages</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
CachedShaderProgram gpShaders,
ShaderAddresses addresses)
{
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++)
{
CachedShaderStage shader = gpShaders.Shaders[stageIndex];
ulong gpuVa = addressesSpan[stageIndex];
if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa))
{
return false;
}
}
return gpShaders.SpecializationState.MatchesGraphics(channel, poolState);
}
/// <summary>
/// Checks if the code of the specified cached shader is different from the code in memory.
/// </summary>
/// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
/// <param name="shader">Cached shader to compare with</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa)
{
if (shader == null)
{
return true;
}
ReadOnlySpan<byte> memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length);
return memoryCode.SequenceEqual(shader.Code);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa)
{
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <remarks>
/// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader.
/// </remarks>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="flags">Flags that controls shader translation</param>
/// <param name="gpuVa">GPU virtual address of the shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa)
{
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="currentStage">Translator context of the stage to be translated</param>
/// <param name="vertexA">Optional translator context of the shader that should be combined</param>
/// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
/// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShaderVertexPair TranslateShader(
ShaderDumper dumper,
GpuChannel channel,
TranslatorContext currentStage,
TranslatorContext vertexA,
byte[] codeA,
byte[] codeB)
{
ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
var memoryManager = channel.MemoryManager;
codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray();
codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray();
byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray();
ShaderDumpPaths pathsA = default;
ShaderDumpPaths pathsB = default;
if (dumper != null)
{
pathsA = dumper.Dump(codeA, compute: false);
pathsB = dumper.Dump(codeB, compute: false);
}
ShaderProgram program = currentStage.Translate(vertexA);
pathsB.Prepend(program);
pathsA.Prepend(program);
CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA);
CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB);
return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="context">Translator context of the stage to be translated</param>
/// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
{
var memoryManager = channel.MemoryManager;
ulong cb1DataAddress = context.Stage == ShaderStage.Compute
? channel.BufferManager.GetComputeUniformBufferAddress(1)
: channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1);
byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray();
code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
ShaderProgram program = context.Translate();
paths.Prepend(program);
return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program);
}
/// <summary>
/// Gets the index of a stage from a <see cref="ShaderStage"/>.
/// </summary>
/// <param name="stage">Stage to get the index from</param>
/// <returns>Stage index</returns>
private static int StageToStageIndex(ShaderStage stage)
{
return stage switch
{
ShaderStage.TessellationControl => 1,
ShaderStage.TessellationEvaluation => 2,
ShaderStage.Geometry => 3,
ShaderStage.Fragment => 4,
_ => 0
};
}
/// <summary>
/// Disposes the shader cache, deleting all the cached shaders.
/// It's an error to use the shader cache after disposal.
/// </summary>
public void Dispose()
{
foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms())
{
program.Dispose();
}
foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms())
{
program.Dispose();
}
_cacheWriter?.Dispose();
}
}
}