Ryujinx/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
riperiperi 99ffc061d3
Optimize Texture Binding and Shader Specialization Checks (#3399)
* Changes 1

* Changes 2

* Better ModifiedSequence handling

This should handle PreciseEvents properly, and simplifies a few things.

* Minor changes, remove debug log

* Handle stage.Info being null

Hopefully fixes Catherine crash

* Fix shader specialization fast texture lookup

* Fix some things.

* Address Feedback Part 1

* Make method static.
2022-06-17 13:09:14 -03:00

617 lines
26 KiB
C#

using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.Cache;
using Ryujinx.Graphics.Gpu.Shader.DiskCache;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Threading;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Memory cache of shader code.
/// </summary>
class ShaderCache : IDisposable
{
/// <summary>
/// Default flags used on the shader translation process.
/// </summary>
public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode;
private struct TranslatedShader
{
public readonly CachedShaderStage Shader;
public readonly ShaderProgram Program;
public TranslatedShader(CachedShaderStage shader, ShaderProgram program)
{
Shader = shader;
Program = program;
}
}
private struct TranslatedShaderVertexPair
{
public readonly CachedShaderStage VertexA;
public readonly CachedShaderStage VertexB;
public readonly ShaderProgram Program;
public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program)
{
VertexA = vertexA;
VertexB = vertexB;
Program = program;
}
}
private readonly GpuContext _context;
private readonly ShaderDumper _dumper;
private readonly Dictionary<ulong, CachedShaderProgram> _cpPrograms;
private readonly Dictionary<ShaderAddresses, CachedShaderProgram> _gpPrograms;
private struct ProgramToSave
{
public readonly CachedShaderProgram CachedProgram;
public readonly IProgram HostProgram;
public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram)
{
CachedProgram = cachedProgram;
HostProgram = hostProgram;
}
}
private Queue<ProgramToSave> _programsToSaveQueue;
private readonly ComputeShaderCacheHashTable _computeShaderCache;
private readonly ShaderCacheHashTable _graphicsShaderCache;
private readonly DiskCacheHostStorage _diskCacheHostStorage;
private readonly BackgroundDiskCacheWriter _cacheWriter;
/// <summary>
/// Event for signalling shader cache loading progress.
/// </summary>
public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged;
/// <summary>
/// Creates a new instance of the shader cache.
/// </summary>
/// <param name="context">GPU context that the shader cache belongs to</param>
public ShaderCache(GpuContext context)
{
_context = context;
_dumper = new ShaderDumper();
_cpPrograms = new Dictionary<ulong, CachedShaderProgram>();
_gpPrograms = new Dictionary<ShaderAddresses, CachedShaderProgram>();
_programsToSaveQueue = new Queue<ProgramToSave>();
string diskCacheTitleId = GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null
? CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId)
: null;
_computeShaderCache = new ComputeShaderCacheHashTable();
_graphicsShaderCache = new ShaderCacheHashTable();
_diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId);
if (_diskCacheHostStorage.CacheEnabled)
{
_cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage);
}
}
/// <summary>
/// Processes the queue of shaders that must save their binaries to the disk cache.
/// </summary>
public void ProcessShaderCacheQueue()
{
// Check to see if the binaries for previously compiled shaders are ready, and save them out.
while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave))
{
ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false);
if (result != ProgramLinkStatus.Incomplete)
{
if (result == ProgramLinkStatus.Success)
{
_cacheWriter.AddShader(programToSave.CachedProgram, programToSave.HostProgram.GetBinary());
}
_programsToSaveQueue.Dequeue();
}
else
{
break;
}
}
}
/// <summary>
/// Initialize the cache.
/// </summary>
/// <param name="cancellationToken">Cancellation token to cancel the shader cache initialization process</param>
internal void Initialize(CancellationToken cancellationToken)
{
if (_diskCacheHostStorage.CacheEnabled)
{
if (!_diskCacheHostStorage.CacheExists())
{
// If we don't have a shader cache on the new format, try to perform migration from the old shader cache.
Logger.Info?.Print(LogClass.Gpu, "No shader cache found, trying to migrate from legacy shader cache...");
int migrationCount = Migration.MigrateFromLegacyCache(_context, _diskCacheHostStorage);
Logger.Info?.Print(LogClass.Gpu, $"Migrated {migrationCount} shaders.");
}
ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader(
_context,
_graphicsShaderCache,
_computeShaderCache,
_diskCacheHostStorage,
cancellationToken,
ShaderCacheStateUpdate);
loader.LoadShaders();
int errorCount = loader.ErrorCount;
if (errorCount != 0)
{
Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache.");
}
}
}
/// <summary>
/// Shader cache state update handler.
/// </summary>
/// <param name="state">Current state of the shader cache load process</param>
/// <param name="current">Number of the current shader being processed</param>
/// <param name="total">Total number of shaders to process</param>
private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total)
{
ShaderCacheStateChanged?.Invoke(state, current, total);
}
/// <summary>
/// Gets a compute shader from the cache.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute engine state</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>Compiled compute shader code</returns>
public CachedShaderProgram GetComputeShader(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa)
{
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa))
{
return cpShader;
}
if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
ShaderSpecializationState specState = new ShaderSpecializationState(computeState);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState);
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, gpuVa);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
IProgram hostProgram = _context.Renderer.CreateProgram(new ShaderSource[] { CreateShaderSource(translatedShader.Program) }, new ShaderInfo(-1));
cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
_computeShaderCache.Add(cpShader);
EnqueueProgramToSave(new ProgramToSave(cpShader, hostProgram));
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
/// <summary>
/// Gets a graphics shader program from the shader cache.
/// This includes all the specified shader stages.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="state">GPU state</param>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">3D engine state</param>
/// <param name="addresses">Addresses of the shaders for each stage</param>
/// <returns>Compiled graphics shader code</returns>
public CachedShaderProgram GetGraphicsShader(
ref ThreedClassState state,
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelGraphicsState graphicsState,
ShaderAddresses addresses)
{
if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, poolState, graphicsState, gpShaders, addresses))
{
return gpShaders;
}
if (_graphicsShaderCache.TryFind(channel, poolState, graphicsState, addresses, out gpShaders, out var cachedGuestCode))
{
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state);
ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, transformFeedbackDescriptors);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors);
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
{
ulong gpuVa = addressesSpan[stageIndex + 1];
if (gpuVa != 0)
{
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex);
TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, DefaultFlags, gpuVa);
if (nextStage != null)
{
currentStage.SetNextStage(nextStage);
}
if (stageIndex == 0 && addresses.VertexA != 0)
{
translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
}
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
}
}
CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
List<ShaderSource> shaderSources = new List<ShaderSource>();
for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
{
TranslatorContext currentStage = translatorContexts[stageIndex + 1];
if (currentStage != null)
{
ShaderProgram program;
if (stageIndex == 0 && translatorContexts[0] != null)
{
TranslatedShaderVertexPair translatedShader = TranslateShader(
_dumper,
channel,
currentStage,
translatorContexts[0],
cachedGuestCode.VertexACode,
cachedGuestCode.VertexBCode);
shaders[0] = translatedShader.VertexA;
shaders[1] = translatedShader.VertexB;
program = translatedShader.Program;
}
else
{
byte[] code = cachedGuestCode.GetByIndex(stageIndex);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
shaders[stageIndex + 1] = translatedShader.Shader;
program = translatedShader.Program;
}
if (program != null)
{
shaderSources.Add(CreateShaderSource(program));
}
}
}
int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1;
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources.ToArray(), new ShaderInfo(fragmentOutputMap));
gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
_graphicsShaderCache.Add(gpShaders);
EnqueueProgramToSave(new ProgramToSave(gpShaders, hostProgram));
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
/// <summary>
/// Creates a shader source for use with the backend from a translated shader program.
/// </summary>
/// <param name="program">Translated shader program</param>
/// <returns>Shader source</returns>
public static ShaderSource CreateShaderSource(ShaderProgram program)
{
return new ShaderSource(program.Code, program.BinaryCode, program.Info.Stage, program.Language);
}
/// <summary>
/// Puts a program on the queue of programs to be saved on the disk cache.
/// </summary>
/// <remarks>
/// This will not do anything if disk shader cache is disabled.
/// </remarks>
/// <param name="programToSave">Program to be saved on disk</param>
private void EnqueueProgramToSave(ProgramToSave programToSave)
{
if (_diskCacheHostStorage.CacheEnabled)
{
_programsToSaveQueue.Enqueue(programToSave);
}
}
/// <summary>
/// Gets transform feedback state from the current GPU state.
/// </summary>
/// <param name="state">Current GPU state</param>
/// <returns>Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled</returns>
private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state)
{
bool tfEnable = state.TfEnable;
if (!tfEnable)
{
return null;
}
TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers];
for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++)
{
var tf = state.TfState[i];
descs[i] = new TransformFeedbackDescriptor(
tf.BufferIndex,
tf.Stride,
tf.VaryingsCount,
ref state.TfVaryingLocations[i]);
}
return descs;
}
/// <summary>
/// Checks if compute shader code in memory is equal to the cached shader.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
CachedShaderProgram cpShader,
ulong gpuVa)
{
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{
return cpShader.SpecializationState.MatchesCompute(channel, poolState, true);
}
return false;
}
/// <summary>
/// Checks if graphics shader code from all stages in memory are equal to the cached shaders.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="graphicsState">GPU channel graphics state to verify shader compatibility</param>
/// <param name="gpShaders">Cached graphics shaders</param>
/// <param name="addresses">GPU virtual addresses of all enabled shader stages</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelGraphicsState graphicsState,
CachedShaderProgram gpShaders,
ShaderAddresses addresses)
{
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++)
{
CachedShaderStage shader = gpShaders.Shaders[stageIndex];
ulong gpuVa = addressesSpan[stageIndex];
if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa))
{
return false;
}
}
return gpShaders.SpecializationState.MatchesGraphics(channel, poolState, graphicsState, true);
}
/// <summary>
/// Checks if the code of the specified cached shader is different from the code in memory.
/// </summary>
/// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
/// <param name="shader">Cached shader to compare with</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa)
{
if (shader == null)
{
return true;
}
ReadOnlySpan<byte> memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length);
return memoryCode.SequenceEqual(shader.Code);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, ulong gpuVa)
{
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <remarks>
/// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader.
/// </remarks>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="flags">Flags that controls shader translation</param>
/// <param name="gpuVa">GPU virtual address of the shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TranslationFlags flags, ulong gpuVa)
{
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="currentStage">Translator context of the stage to be translated</param>
/// <param name="vertexA">Optional translator context of the shader that should be combined</param>
/// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
/// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShaderVertexPair TranslateShader(
ShaderDumper dumper,
GpuChannel channel,
TranslatorContext currentStage,
TranslatorContext vertexA,
byte[] codeA,
byte[] codeB)
{
ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
var memoryManager = channel.MemoryManager;
codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray();
codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray();
byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray();
ShaderDumpPaths pathsA = default;
ShaderDumpPaths pathsB = default;
if (dumper != null)
{
pathsA = dumper.Dump(codeA, compute: false);
pathsB = dumper.Dump(codeB, compute: false);
}
ShaderProgram program = currentStage.Translate(vertexA);
pathsB.Prepend(program);
pathsA.Prepend(program);
CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA);
CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB);
return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="context">Translator context of the stage to be translated</param>
/// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
{
var memoryManager = channel.MemoryManager;
ulong cb1DataAddress = context.Stage == ShaderStage.Compute
? channel.BufferManager.GetComputeUniformBufferAddress(1)
: channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1);
byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray();
code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
ShaderProgram program = context.Translate();
paths.Prepend(program);
return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program);
}
/// <summary>
/// Gets the index of a stage from a <see cref="ShaderStage"/>.
/// </summary>
/// <param name="stage">Stage to get the index from</param>
/// <returns>Stage index</returns>
private static int StageToStageIndex(ShaderStage stage)
{
return stage switch
{
ShaderStage.TessellationControl => 1,
ShaderStage.TessellationEvaluation => 2,
ShaderStage.Geometry => 3,
ShaderStage.Fragment => 4,
_ => 0
};
}
/// <summary>
/// Disposes the shader cache, deleting all the cached shaders.
/// It's an error to use the shader cache after disposal.
/// </summary>
public void Dispose()
{
foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms())
{
program.Dispose();
}
foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms())
{
program.Dispose();
}
_cacheWriter?.Dispose();
}
}
}