Use a new approach for shader BRX targets (#2532)
* Use a new approach for shader BRX targets * Make shader cache actually work * Improve the shader pattern matching a bit * Extend LDC search to predecessor blocks, catches more cases * Nit * Only save the amount of constant buffer data actually used. Avoids crashes on partially mapped buffers * Ignore Rd on predicate instructions, as they do not have a Rd register (catches more cases)
This commit is contained in:
parent
70f79e689b
commit
d9d18439f6
12 changed files with 472 additions and 149 deletions
|
@ -38,6 +38,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
/// </summary>
|
/// </summary>
|
||||||
RemoveManifestEntries,
|
RemoveManifestEntries,
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Remove entries from the hash manifest and save it, and also deletes the temporary file.
|
||||||
|
/// </summary>
|
||||||
|
RemoveManifestEntryAndTempFile,
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Flush temporary cache to archive.
|
/// Flush temporary cache to archive.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -116,6 +121,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private ZipArchive _cacheArchive;
|
private ZipArchive _cacheArchive;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Indicates if the cache collection supports modification.
|
||||||
|
/// </summary>
|
||||||
public bool IsReadOnly { get; }
|
public bool IsReadOnly { get; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
@ -264,6 +272,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Remove given entry from the manifest and delete the temporary file.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="entry">Entry to remove from the manifest</param>
|
||||||
|
private void RemoveManifestEntryAndTempFile(Hash128 entry)
|
||||||
|
{
|
||||||
|
lock (_hashTable)
|
||||||
|
{
|
||||||
|
_hashTable.Remove(entry);
|
||||||
|
SaveManifest();
|
||||||
|
}
|
||||||
|
|
||||||
|
File.Delete(GenCacheTempFilePath(entry));
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Queue a task to flush temporary files to the archive on the worker.
|
/// Queue a task to flush temporary files to the archive on the worker.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -440,6 +463,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
case CacheFileOperation.RemoveManifestEntries:
|
case CacheFileOperation.RemoveManifestEntries:
|
||||||
RemoveManifestEntries((HashSet<Hash128>)task.Data);
|
RemoveManifestEntries((HashSet<Hash128>)task.Data);
|
||||||
break;
|
break;
|
||||||
|
case CacheFileOperation.RemoveManifestEntryAndTempFile:
|
||||||
|
RemoveManifestEntryAndTempFile((Hash128)task.Data);
|
||||||
|
break;
|
||||||
case CacheFileOperation.FlushToArchive:
|
case CacheFileOperation.FlushToArchive:
|
||||||
FlushToArchive();
|
FlushToArchive();
|
||||||
break;
|
break;
|
||||||
|
@ -472,7 +498,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
{
|
{
|
||||||
if (IsReadOnly)
|
if (IsReadOnly)
|
||||||
{
|
{
|
||||||
Logger.Warning?.Print(LogClass.Gpu, "Trying to add {keyHash} on a read-only cache, ignoring.");
|
Logger.Warning?.Print(LogClass.Gpu, $"Trying to add {keyHash} on a read-only cache, ignoring.");
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -521,7 +547,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
{
|
{
|
||||||
if (IsReadOnly)
|
if (IsReadOnly)
|
||||||
{
|
{
|
||||||
Logger.Warning?.Print(LogClass.Gpu, "Trying to replace {keyHash} on a read-only cache, ignoring.");
|
Logger.Warning?.Print(LogClass.Gpu, $"Trying to replace {keyHash} on a read-only cache, ignoring.");
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -540,6 +566,27 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Removes a value at the given hash from the cache.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="keyHash">The hash of the value in the cache</param>
|
||||||
|
public void RemoveValue(ref Hash128 keyHash)
|
||||||
|
{
|
||||||
|
if (IsReadOnly)
|
||||||
|
{
|
||||||
|
Logger.Warning?.Print(LogClass.Gpu, $"Trying to remove {keyHash} on a read-only cache, ignoring.");
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only queue file change operations
|
||||||
|
_fileWriterWorkerQueue.Add(new CacheFileOperationTask
|
||||||
|
{
|
||||||
|
Type = CacheFileOperation.RemoveManifestEntryAndTempFile,
|
||||||
|
Data = keyHash
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
Dispose(true);
|
Dispose(true);
|
||||||
|
|
|
@ -371,11 +371,13 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Create guest shader cache entries from the runtime contexts.
|
/// Create guest shader cache entries from the runtime contexts.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="memoryManager">The GPU memory manager in use</param>
|
/// <param name="channel">The GPU channel in use</param>
|
||||||
/// <param name="shaderContexts">The runtime contexts</param>
|
/// <param name="shaderContexts">The runtime contexts</param>
|
||||||
/// <returns>Guest shader cahe entries from the runtime contexts</returns>
|
/// <returns>Guest shader cahe entries from the runtime contexts</returns>
|
||||||
public static GuestShaderCacheEntry[] CreateShaderCacheEntries(MemoryManager memoryManager, ReadOnlySpan<TranslatorContext> shaderContexts)
|
public static GuestShaderCacheEntry[] CreateShaderCacheEntries(GpuChannel channel, ReadOnlySpan<TranslatorContext> shaderContexts)
|
||||||
{
|
{
|
||||||
|
MemoryManager memoryManager = channel.MemoryManager;
|
||||||
|
|
||||||
int startIndex = shaderContexts.Length > 1 ? 1 : 0;
|
int startIndex = shaderContexts.Length > 1 ? 1 : 0;
|
||||||
|
|
||||||
GuestShaderCacheEntry[] entries = new GuestShaderCacheEntry[shaderContexts.Length - startIndex];
|
GuestShaderCacheEntry[] entries = new GuestShaderCacheEntry[shaderContexts.Length - startIndex];
|
||||||
|
@ -389,31 +391,66 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GpuAccessor gpuAccessor = context.GpuAccessor as GpuAccessor;
|
||||||
|
|
||||||
|
ulong cb1DataAddress;
|
||||||
|
int cb1DataSize = gpuAccessor?.Cb1DataSize ?? 0;
|
||||||
|
|
||||||
|
if (context.Stage == ShaderStage.Compute)
|
||||||
|
{
|
||||||
|
cb1DataAddress = channel.BufferManager.GetComputeUniformBufferAddress(1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int stageIndex = context.Stage switch
|
||||||
|
{
|
||||||
|
ShaderStage.TessellationControl => 1,
|
||||||
|
ShaderStage.TessellationEvaluation => 2,
|
||||||
|
ShaderStage.Geometry => 3,
|
||||||
|
ShaderStage.Fragment => 4,
|
||||||
|
_ => 0
|
||||||
|
};
|
||||||
|
|
||||||
|
cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = context.Size;
|
||||||
|
|
||||||
TranslatorContext translatorContext2 = i == 1 ? shaderContexts[0] : null;
|
TranslatorContext translatorContext2 = i == 1 ? shaderContexts[0] : null;
|
||||||
|
|
||||||
int sizeA = translatorContext2 != null ? translatorContext2.Size : 0;
|
int sizeA = translatorContext2 != null ? translatorContext2.Size : 0;
|
||||||
|
|
||||||
byte[] code = new byte[context.Size + sizeA];
|
byte[] code = new byte[size + cb1DataSize + sizeA];
|
||||||
|
|
||||||
memoryManager.GetSpan(context.Address, context.Size).CopyTo(code);
|
memoryManager.GetSpan(context.Address, size).CopyTo(code);
|
||||||
|
|
||||||
|
if (cb1DataAddress != 0 && cb1DataSize != 0)
|
||||||
|
{
|
||||||
|
memoryManager.Physical.GetSpan(cb1DataAddress, cb1DataSize).CopyTo(code.AsSpan().Slice(size, cb1DataSize));
|
||||||
|
}
|
||||||
|
|
||||||
if (translatorContext2 != null)
|
if (translatorContext2 != null)
|
||||||
{
|
{
|
||||||
memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan().Slice(context.Size, sizeA));
|
memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan().Slice(size + cb1DataSize, sizeA));
|
||||||
}
|
}
|
||||||
|
|
||||||
GuestGpuAccessorHeader gpuAccessorHeader = CreateGuestGpuAccessorCache(context.GpuAccessor);
|
GuestGpuAccessorHeader gpuAccessorHeader = CreateGuestGpuAccessorCache(context.GpuAccessor);
|
||||||
|
|
||||||
if (context.GpuAccessor is GpuAccessor)
|
if (gpuAccessor != null)
|
||||||
{
|
{
|
||||||
gpuAccessorHeader.TextureDescriptorCount = context.TextureHandlesForCache.Count;
|
gpuAccessorHeader.TextureDescriptorCount = context.TextureHandlesForCache.Count;
|
||||||
}
|
}
|
||||||
|
|
||||||
GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader(context.Stage, context.Size, sizeA, gpuAccessorHeader);
|
GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader(
|
||||||
|
context.Stage,
|
||||||
|
size + cb1DataSize,
|
||||||
|
sizeA,
|
||||||
|
cb1DataSize,
|
||||||
|
gpuAccessorHeader);
|
||||||
|
|
||||||
GuestShaderCacheEntry entry = new GuestShaderCacheEntry(header, code);
|
GuestShaderCacheEntry entry = new GuestShaderCacheEntry(header, code);
|
||||||
|
|
||||||
if (context.GpuAccessor is GpuAccessor gpuAccessor)
|
if (gpuAccessor != null)
|
||||||
{
|
{
|
||||||
foreach (int textureHandle in context.TextureHandlesForCache)
|
foreach (int textureHandle in context.TextureHandlesForCache)
|
||||||
{
|
{
|
||||||
|
|
|
@ -114,6 +114,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
|
||||||
_hostProgramCache.ReplaceValue(ref programCodeHash, data);
|
_hostProgramCache.ReplaceValue(ref programCodeHash, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Removes a shader program present in the program cache.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="programCodeHash">Target program code hash</param>
|
||||||
|
public void RemoveProgram(ref Hash128 programCodeHash)
|
||||||
|
{
|
||||||
|
_guestProgramCache.RemoveValue(ref programCodeHash);
|
||||||
|
_hostProgramCache.RemoveValue(ref programCodeHash);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get all guest program hashes.
|
/// Get all guest program hashes.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -40,9 +40,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
|
||||||
public int SizeA;
|
public int SizeA;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Unused/reserved.
|
/// Constant buffer 1 data size.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public int Reserved4;
|
public int Cb1DataSize;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The header of the cached gpu accessor.
|
/// The header of the cached gpu accessor.
|
||||||
|
@ -55,12 +55,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
|
||||||
/// <param name="stage">The stage of this shader</param>
|
/// <param name="stage">The stage of this shader</param>
|
||||||
/// <param name="size">The size of the code section</param>
|
/// <param name="size">The size of the code section</param>
|
||||||
/// <param name="sizeA">The size of the code2 section if present (Vertex A)</param>
|
/// <param name="sizeA">The size of the code2 section if present (Vertex A)</param>
|
||||||
|
/// <param name="cb1DataSize">Constant buffer 1 data size</param>
|
||||||
/// <param name="gpuAccessorHeader">The header of the cached gpu accessor</param>
|
/// <param name="gpuAccessorHeader">The header of the cached gpu accessor</param>
|
||||||
public GuestShaderCacheEntryHeader(ShaderStage stage, int size, int sizeA, GuestGpuAccessorHeader gpuAccessorHeader) : this()
|
public GuestShaderCacheEntryHeader(ShaderStage stage, int size, int sizeA, int cb1DataSize, GuestGpuAccessorHeader gpuAccessorHeader) : this()
|
||||||
{
|
{
|
||||||
Stage = stage;
|
Stage = stage;
|
||||||
Size = size;
|
Size = size;
|
||||||
SizeA = sizeA;
|
SizeA = sizeA;
|
||||||
|
Cb1DataSize = cb1DataSize;
|
||||||
GpuAccessorHeader = gpuAccessorHeader;
|
GpuAccessorHeader = gpuAccessorHeader;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
{
|
{
|
||||||
private readonly GpuContext _context;
|
private readonly GpuContext _context;
|
||||||
private readonly ReadOnlyMemory<byte> _data;
|
private readonly ReadOnlyMemory<byte> _data;
|
||||||
|
private readonly ReadOnlyMemory<byte> _cb1Data;
|
||||||
private readonly GuestGpuAccessorHeader _header;
|
private readonly GuestGpuAccessorHeader _header;
|
||||||
private readonly Dictionary<int, GuestTextureDescriptor> _textureDescriptors;
|
private readonly Dictionary<int, GuestTextureDescriptor> _textureDescriptors;
|
||||||
|
|
||||||
|
@ -19,12 +20,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="context">GPU context</param>
|
/// <param name="context">GPU context</param>
|
||||||
/// <param name="data">The data of the shader</param>
|
/// <param name="data">The data of the shader</param>
|
||||||
|
/// <param name="cb1Data">The constant buffer 1 data of the shader</param>
|
||||||
/// <param name="header">The cache of the GPU accessor</param>
|
/// <param name="header">The cache of the GPU accessor</param>
|
||||||
/// <param name="guestTextureDescriptors">The cache of the texture descriptors</param>
|
/// <param name="guestTextureDescriptors">The cache of the texture descriptors</param>
|
||||||
public CachedGpuAccessor(GpuContext context, ReadOnlyMemory<byte> data, GuestGpuAccessorHeader header, Dictionary<int, GuestTextureDescriptor> guestTextureDescriptors)
|
public CachedGpuAccessor(
|
||||||
|
GpuContext context,
|
||||||
|
ReadOnlyMemory<byte> data,
|
||||||
|
ReadOnlyMemory<byte> cb1Data,
|
||||||
|
GuestGpuAccessorHeader header,
|
||||||
|
Dictionary<int, GuestTextureDescriptor> guestTextureDescriptors)
|
||||||
{
|
{
|
||||||
_context = context;
|
_context = context;
|
||||||
_data = data;
|
_data = data;
|
||||||
|
_cb1Data = cb1Data;
|
||||||
_header = header;
|
_header = header;
|
||||||
_textureDescriptors = new Dictionary<int, GuestTextureDescriptor>();
|
_textureDescriptors = new Dictionary<int, GuestTextureDescriptor>();
|
||||||
|
|
||||||
|
@ -34,6 +42,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads data from the constant buffer 1.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="offset">Offset in bytes to read from</param>
|
||||||
|
/// <returns>Value at the given offset</returns>
|
||||||
|
public uint ConstantBuffer1Read(int offset)
|
||||||
|
{
|
||||||
|
return MemoryMarshal.Cast<byte, uint>(_cb1Data.Span.Slice(offset))[0];
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Prints a log message.
|
/// Prints a log message.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -20,6 +20,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
private readonly int _localMemorySize;
|
private readonly int _localMemorySize;
|
||||||
private readonly int _sharedMemorySize;
|
private readonly int _sharedMemorySize;
|
||||||
|
|
||||||
|
public int Cb1DataSize { get; private set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates a new instance of the GPU state accessor for graphics shader translation.
|
/// Creates a new instance of the GPU state accessor for graphics shader translation.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -67,6 +69,25 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
_sharedMemorySize = sharedMemorySize;
|
_sharedMemorySize = sharedMemorySize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Reads data from the constant buffer 1.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="offset">Offset in bytes to read from</param>
|
||||||
|
/// <returns>Value at the given offset</returns>
|
||||||
|
public uint ConstantBuffer1Read(int offset)
|
||||||
|
{
|
||||||
|
if (Cb1DataSize < offset + 4)
|
||||||
|
{
|
||||||
|
Cb1DataSize = offset + 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
ulong baseAddress = _compute
|
||||||
|
? _channel.BufferManager.GetComputeUniformBufferAddress(1)
|
||||||
|
: _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1);
|
||||||
|
|
||||||
|
return _channel.MemoryManager.Physical.Read<uint>(baseAddress + (ulong)offset);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Prints a log message.
|
/// Prints a log message.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Version of the codegen (to be changed when codegen or guest format change).
|
/// Version of the codegen (to be changed when codegen or guest format change).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private const ulong ShaderCodeGenVersion = 2469;
|
private const ulong ShaderCodeGenVersion = 2530;
|
||||||
|
|
||||||
// Progress reporting helpers
|
// Progress reporting helpers
|
||||||
private volatile int _shaderCount;
|
private volatile int _shaderCount;
|
||||||
|
@ -112,7 +112,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
int programIndex = 0;
|
int programIndex = 0;
|
||||||
List<ShaderCompileTask> activeTasks = new List<ShaderCompileTask>();
|
List<ShaderCompileTask> activeTasks = new List<ShaderCompileTask>();
|
||||||
|
|
||||||
AutoResetEvent taskDoneEvent = new AutoResetEvent(false);
|
using AutoResetEvent taskDoneEvent = new AutoResetEvent(false);
|
||||||
|
|
||||||
// This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background.
|
// This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background.
|
||||||
// The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once.
|
// The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once.
|
||||||
|
@ -191,7 +191,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
|
|
||||||
Task compileTask = Task.Run(() =>
|
Task compileTask = Task.Run(() =>
|
||||||
{
|
{
|
||||||
IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
|
var binaryCode = new Memory<byte>(entry.Code);
|
||||||
|
|
||||||
|
var gpuAccessor = new CachedGpuAccessor(
|
||||||
|
_context,
|
||||||
|
binaryCode,
|
||||||
|
binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
|
||||||
|
entry.Header.GpuAccessorHeader,
|
||||||
|
entry.TextureDescriptors);
|
||||||
|
|
||||||
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
|
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
|
||||||
program = Translator.CreateContext(0, gpuAccessor, options).Translate(out shaderProgramInfo);
|
program = Translator.CreateContext(0, gpuAccessor, options).Translate(out shaderProgramInfo);
|
||||||
|
@ -199,12 +206,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
|
|
||||||
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
|
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
|
||||||
{
|
{
|
||||||
|
if (task.IsFaulted)
|
||||||
|
{
|
||||||
|
Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding...");
|
||||||
|
|
||||||
|
_cacheManager.RemoveProgram(ref key);
|
||||||
|
return true; // Exit early, the decoding step failed.
|
||||||
|
}
|
||||||
|
|
||||||
ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
|
ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
|
||||||
|
|
||||||
Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
|
Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
|
||||||
|
|
||||||
// Compile shader and create program as the shader program binary got invalidated.
|
// Compile shader and create program as the shader program binary got invalidated.
|
||||||
shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
|
shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, program.Code);
|
||||||
hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
|
hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
|
||||||
|
|
||||||
task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) =>
|
task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) =>
|
||||||
|
@ -298,7 +313,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
|
var binaryCode = new Memory<byte>(entry.Code);
|
||||||
|
|
||||||
|
var gpuAccessor = new CachedGpuAccessor(
|
||||||
|
_context,
|
||||||
|
binaryCode,
|
||||||
|
binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
|
||||||
|
entry.Header.GpuAccessorHeader,
|
||||||
|
entry.TextureDescriptors);
|
||||||
|
|
||||||
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
|
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
|
||||||
var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA);
|
var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA);
|
||||||
|
@ -310,7 +332,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: Vertex B comes first in the shader cache.
|
// NOTE: Vertex B comes first in the shader cache.
|
||||||
byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray();
|
byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
|
||||||
byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
|
byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
|
||||||
|
|
||||||
shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
|
shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
|
||||||
|
@ -326,13 +348,22 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
|
var binaryCode = new Memory<byte>(entry.Code);
|
||||||
|
|
||||||
|
var gpuAccessor = new CachedGpuAccessor(
|
||||||
|
_context,
|
||||||
|
binaryCode,
|
||||||
|
binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
|
||||||
|
entry.Header.GpuAccessorHeader,
|
||||||
|
entry.TextureDescriptors);
|
||||||
|
|
||||||
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
|
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
|
||||||
program = Translator.CreateContext(0, gpuAccessor, options, counts).Translate(out shaderProgramInfo);
|
program = Translator.CreateContext(0, gpuAccessor, options, counts).Translate(out shaderProgramInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
|
byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
|
||||||
|
|
||||||
|
shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
shaderPrograms.Add(program);
|
shaderPrograms.Add(program);
|
||||||
|
@ -341,6 +372,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
|
|
||||||
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
|
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
|
||||||
{
|
{
|
||||||
|
if (task.IsFaulted)
|
||||||
|
{
|
||||||
|
Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding...");
|
||||||
|
|
||||||
|
_cacheManager.RemoveProgram(ref key);
|
||||||
|
return true; // Exit early, the decoding step failed.
|
||||||
|
}
|
||||||
|
|
||||||
// If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
|
// If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
|
||||||
if (!isHostProgramValid)
|
if (!isHostProgramValid)
|
||||||
{
|
{
|
||||||
|
@ -537,7 +576,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
|
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
|
||||||
|
|
||||||
// Compute hash and prepare data for shader disk cache comparison.
|
// Compute hash and prepare data for shader disk cache comparison.
|
||||||
shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel.MemoryManager, shaderContexts);
|
shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts);
|
||||||
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries);
|
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -659,7 +698,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
|
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
|
||||||
|
|
||||||
// Compute hash and prepare data for shader disk cache comparison.
|
// Compute hash and prepare data for shader disk cache comparison.
|
||||||
shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel.MemoryManager, shaderContexts);
|
shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts);
|
||||||
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd);
|
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
using Ryujinx.Graphics.GAL;
|
using Ryujinx.Graphics.GAL;
|
||||||
using System;
|
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
@ -20,6 +19,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
private ShaderCompileTaskCallback _action;
|
private ShaderCompileTaskCallback _action;
|
||||||
private AutoResetEvent _taskDoneEvent;
|
private AutoResetEvent _taskDoneEvent;
|
||||||
|
|
||||||
|
public bool IsFaulted => _programsTask.IsFaulted;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Create a new shader compile task, with an event to signal whenever a subtask completes.
|
/// Create a new shader compile task, with an event to signal whenever a subtask completes.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
@ -8,10 +8,38 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
public ulong Address { get; set; }
|
public ulong Address { get; set; }
|
||||||
public ulong EndAddress { get; set; }
|
public ulong EndAddress { get; set; }
|
||||||
|
|
||||||
public Block Next { get; set; }
|
private Block _next;
|
||||||
public Block Branch { get; set; }
|
private Block _branch;
|
||||||
|
|
||||||
public OpCodeBranchIndir BrIndir { get; set; }
|
public Block Next
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return _next;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
_next?.Predecessors.Remove(this);
|
||||||
|
value?.Predecessors.Add(this);
|
||||||
|
_next = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Block Branch
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
return _branch;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
{
|
||||||
|
_branch?.Predecessors.Remove(this);
|
||||||
|
value?.Predecessors.Add(this);
|
||||||
|
_branch = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HashSet<Block> Predecessors { get; }
|
||||||
|
|
||||||
public List<OpCode> OpCodes { get; }
|
public List<OpCode> OpCodes { get; }
|
||||||
public List<OpCodePush> PushOpCodes { get; }
|
public List<OpCodePush> PushOpCodes { get; }
|
||||||
|
@ -20,6 +48,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
{
|
{
|
||||||
Address = address;
|
Address = address;
|
||||||
|
|
||||||
|
Predecessors = new HashSet<Block>();
|
||||||
|
|
||||||
OpCodes = new List<OpCode>();
|
OpCodes = new List<OpCode>();
|
||||||
PushOpCodes = new List<OpCodePush>();
|
PushOpCodes = new List<OpCodePush>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,8 +9,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
{
|
{
|
||||||
static class Decoder
|
static class Decoder
|
||||||
{
|
{
|
||||||
public const ulong ShaderEndDelimiter = 0xe2400fffff87000f;
|
|
||||||
|
|
||||||
public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
|
public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
|
||||||
{
|
{
|
||||||
hasBindless = false;
|
hasBindless = false;
|
||||||
|
@ -51,130 +49,139 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
|
|
||||||
GetBlock(funcAddress);
|
GetBlock(funcAddress);
|
||||||
|
|
||||||
while (workQueue.TryDequeue(out Block currBlock))
|
bool hasNewTarget;
|
||||||
|
|
||||||
|
do
|
||||||
{
|
{
|
||||||
// Check if the current block is inside another block.
|
while (workQueue.TryDequeue(out Block currBlock))
|
||||||
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
|
|
||||||
{
|
{
|
||||||
Block nBlock = blocks[nBlkIndex];
|
// Check if the current block is inside another block.
|
||||||
|
if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
|
||||||
if (nBlock.Address == currBlock.Address)
|
|
||||||
{
|
{
|
||||||
throw new InvalidOperationException("Found duplicate block address on the list.");
|
Block nBlock = blocks[nBlkIndex];
|
||||||
}
|
|
||||||
|
|
||||||
nBlock.Split(currBlock);
|
if (nBlock.Address == currBlock.Address)
|
||||||
blocks.Insert(nBlkIndex + 1, currBlock);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have a block after the current one, set the limit address.
|
|
||||||
ulong limitAddress = ulong.MaxValue;
|
|
||||||
|
|
||||||
if (nBlkIndex != blocks.Count)
|
|
||||||
{
|
|
||||||
Block nBlock = blocks[nBlkIndex];
|
|
||||||
|
|
||||||
int nextIndex = nBlkIndex + 1;
|
|
||||||
|
|
||||||
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
|
|
||||||
{
|
|
||||||
limitAddress = blocks[nextIndex].Address;
|
|
||||||
}
|
|
||||||
else if (nBlock.Address > currBlock.Address)
|
|
||||||
{
|
|
||||||
limitAddress = blocks[nBlkIndex].Address;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
|
|
||||||
hasBindless |= blockHasBindless;
|
|
||||||
|
|
||||||
if (currBlock.OpCodes.Count != 0)
|
|
||||||
{
|
|
||||||
// We should have blocks for all possible branch targets,
|
|
||||||
// including those from SSY/PBK instructions.
|
|
||||||
foreach (OpCodePush pushOp in currBlock.PushOpCodes)
|
|
||||||
{
|
|
||||||
GetBlock(pushOp.GetAbsoluteAddress());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set child blocks. "Branch" is the block the branch instruction
|
|
||||||
// points to (when taken), "Next" is the block at the next address,
|
|
||||||
// executed when the branch is not taken. For Unconditional Branches
|
|
||||||
// or end of program, Next is null.
|
|
||||||
OpCode lastOp = currBlock.GetLastOp();
|
|
||||||
|
|
||||||
if (lastOp is OpCodeBranch opBr)
|
|
||||||
{
|
|
||||||
if (lastOp.Emitter == InstEmit.Cal)
|
|
||||||
{
|
{
|
||||||
EnqueueFunction(opBr.GetAbsoluteAddress());
|
throw new InvalidOperationException("Found duplicate block address on the list.");
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
nBlock.Split(currBlock);
|
||||||
|
blocks.Insert(nBlkIndex + 1, currBlock);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have a block after the current one, set the limit address.
|
||||||
|
ulong limitAddress = ulong.MaxValue;
|
||||||
|
|
||||||
|
if (nBlkIndex != blocks.Count)
|
||||||
|
{
|
||||||
|
Block nBlock = blocks[nBlkIndex];
|
||||||
|
|
||||||
|
int nextIndex = nBlkIndex + 1;
|
||||||
|
|
||||||
|
if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
|
||||||
{
|
{
|
||||||
currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
|
limitAddress = blocks[nextIndex].Address;
|
||||||
|
}
|
||||||
|
else if (nBlock.Address > currBlock.Address)
|
||||||
|
{
|
||||||
|
limitAddress = blocks[nBlkIndex].Address;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (lastOp is OpCodeBranchIndir opBrIndir)
|
|
||||||
|
FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
|
||||||
|
hasBindless |= blockHasBindless;
|
||||||
|
|
||||||
|
if (currBlock.OpCodes.Count != 0)
|
||||||
{
|
{
|
||||||
// An indirect branch could go anywhere, we don't know the target.
|
// We should have blocks for all possible branch targets,
|
||||||
// Those instructions are usually used on a switch to jump table
|
// including those from SSY/PBK instructions.
|
||||||
// compiler optimization, and in those cases the possible targets
|
foreach (OpCodePush pushOp in currBlock.PushOpCodes)
|
||||||
// seems to be always right after the BRX itself. We can assume
|
{
|
||||||
// that the possible targets are all the blocks in-between the
|
GetBlock(pushOp.GetAbsoluteAddress());
|
||||||
// instruction right after the BRX, and the common target that
|
}
|
||||||
// all the "cases" should eventually jump to, acting as the
|
|
||||||
// switch break.
|
|
||||||
Block firstTarget = GetBlock(currBlock.EndAddress);
|
|
||||||
|
|
||||||
firstTarget.BrIndir = opBrIndir;
|
// Set child blocks. "Branch" is the block the branch instruction
|
||||||
|
// points to (when taken), "Next" is the block at the next address,
|
||||||
|
// executed when the branch is not taken. For Unconditional Branches
|
||||||
|
// or end of program, Next is null.
|
||||||
|
OpCode lastOp = currBlock.GetLastOp();
|
||||||
|
|
||||||
opBrIndir.PossibleTargets.Add(firstTarget);
|
if (lastOp is OpCodeBranch opBr)
|
||||||
|
{
|
||||||
|
if (lastOp.Emitter == InstEmit.Cal)
|
||||||
|
{
|
||||||
|
EnqueueFunction(opBr.GetAbsoluteAddress());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsUnconditionalBranch(lastOp))
|
||||||
|
{
|
||||||
|
currBlock.Next = GetBlock(currBlock.EndAddress);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!IsUnconditionalBranch(lastOp))
|
// Insert the new block on the list (sorted by address).
|
||||||
|
if (blocks.Count != 0)
|
||||||
{
|
{
|
||||||
currBlock.Next = GetBlock(currBlock.EndAddress);
|
Block nBlock = blocks[nBlkIndex];
|
||||||
|
|
||||||
|
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
blocks.Add(currBlock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert the new block on the list (sorted by address).
|
// Propagate SSY/PBK addresses into their uses (SYNC/BRK).
|
||||||
if (blocks.Count != 0)
|
foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
|
||||||
{
|
{
|
||||||
Block nBlock = blocks[nBlkIndex];
|
for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
|
||||||
|
|
||||||
blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
blocks.Add(currBlock);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do we have a block after the current one?
|
|
||||||
if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress))
|
|
||||||
{
|
|
||||||
bool targetVisited = visited.ContainsKey(currBlock.EndAddress);
|
|
||||||
|
|
||||||
Block possibleTarget = GetBlock(currBlock.EndAddress);
|
|
||||||
|
|
||||||
currBlock.BrIndir.PossibleTargets.Add(possibleTarget);
|
|
||||||
|
|
||||||
if (!targetVisited)
|
|
||||||
{
|
{
|
||||||
possibleTarget.BrIndir = currBlock.BrIndir;
|
PropagatePushOp(visited, block, pushOpIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to find target for BRX (indirect branch) instructions.
|
||||||
|
hasNewTarget = false;
|
||||||
|
|
||||||
|
foreach (Block block in blocks)
|
||||||
|
{
|
||||||
|
if (block.GetLastOp() is OpCodeBranchIndir opBrIndir && opBrIndir.PossibleTargets.Count == 0)
|
||||||
|
{
|
||||||
|
ulong baseOffset = opBrIndir.Address + 8 + (ulong)opBrIndir.Offset;
|
||||||
|
|
||||||
|
// An indirect branch could go anywhere,
|
||||||
|
// try to get the possible target offsets from the constant buffer.
|
||||||
|
(int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrIndir.Ra.Index);
|
||||||
|
|
||||||
|
if (cbOffsetsCount != 0)
|
||||||
|
{
|
||||||
|
hasNewTarget = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < cbOffsetsCount; i++)
|
||||||
|
{
|
||||||
|
uint targetOffset = gpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
|
||||||
|
Block target = GetBlock(baseOffset + targetOffset);
|
||||||
|
opBrIndir.PossibleTargets.Add(target);
|
||||||
|
target.Predecessors.Add(block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we discovered new branch targets from the BRX instruction,
|
||||||
|
// we need another round of decoding to decode the new blocks.
|
||||||
|
// Additionally, we may have more SSY/PBK targets to propagate,
|
||||||
|
// and new BRX instructions.
|
||||||
}
|
}
|
||||||
|
while (hasNewTarget);
|
||||||
foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
|
|
||||||
{
|
|
||||||
for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
|
|
||||||
{
|
|
||||||
PropagatePushOp(visited, block, pushOpIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
funcs.Add(blocks.ToArray());
|
funcs.Add(blocks.ToArray());
|
||||||
}
|
}
|
||||||
|
@ -182,19 +189,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
return funcs.ToArray();
|
return funcs.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress)
|
|
||||||
{
|
|
||||||
if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) ||
|
|
||||||
!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7))
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
ulong inst = gpuAccessor.MemoryRead<ulong>(startAdddress + currBlock.EndAddress);
|
|
||||||
|
|
||||||
return inst != 0UL && inst != ShaderEndDelimiter;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
|
private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
|
||||||
{
|
{
|
||||||
index = 0;
|
index = 0;
|
||||||
|
@ -320,6 +314,115 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
opCode is OpCodeExit;
|
opCode is OpCodeExit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static (int, int) FindBrxTargetRange(Block block, int brxReg)
|
||||||
|
{
|
||||||
|
// Try to match the following pattern:
|
||||||
|
//
|
||||||
|
// IMNMX.U32 Rx, Rx, UpperBound, PT
|
||||||
|
// SHL Rx, Rx, 0x2
|
||||||
|
// LDC Rx, c[0x1][Rx+BaseOffset]
|
||||||
|
//
|
||||||
|
// Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
|
||||||
|
// The above pattern is assumed to be generated by the compiler before BRX,
|
||||||
|
// as the instruction is usually used to implement jump tables for switch statement optimizations.
|
||||||
|
// On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
|
||||||
|
// located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
|
||||||
|
|
||||||
|
HashSet<Block> visited = new HashSet<Block>();
|
||||||
|
|
||||||
|
var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
|
||||||
|
if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index] is not OpCodeLdc opLdc)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opLdc.Slot != 1 || opLdc.IndexMode != CbIndexMode.Default)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.Ra.Index);
|
||||||
|
if (shlLocation.Block == null || shlLocation.Block.OpCodes[shlLocation.Index] is not OpCodeAluImm opShl)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opShl.Emitter != InstEmit.Shl || opShl.Immediate != 2)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.Ra.Index);
|
||||||
|
if (imnmxLocation.Block == null || imnmxLocation.Block.OpCodes[imnmxLocation.Index] is not OpCodeAluImm opImnmx)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isImnmxS32 = opImnmx.RawOpCode.Extract(48);
|
||||||
|
|
||||||
|
if (opImnmx.Emitter != InstEmit.Imnmx || isImnmxS32 || !opImnmx.Predicate39.IsPT || opImnmx.InvertP)
|
||||||
|
{
|
||||||
|
return (0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (opLdc.Offset, opImnmx.Immediate + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private struct BlockLocation
|
||||||
|
{
|
||||||
|
public Block Block { get; }
|
||||||
|
public int Index { get; }
|
||||||
|
|
||||||
|
public BlockLocation(Block block, int index)
|
||||||
|
{
|
||||||
|
Block = block;
|
||||||
|
Index = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
|
||||||
|
{
|
||||||
|
Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
|
||||||
|
toVisit.Enqueue(location);
|
||||||
|
visited.Add(location.Block);
|
||||||
|
|
||||||
|
while (toVisit.TryDequeue(out var currentLocation))
|
||||||
|
{
|
||||||
|
Block block = currentLocation.Block;
|
||||||
|
for (int i = currentLocation.Index - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
if (WritesToRegister(block.OpCodes[i], regIndex))
|
||||||
|
{
|
||||||
|
return new BlockLocation(block, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach (Block predecessor in block.Predecessors)
|
||||||
|
{
|
||||||
|
if (visited.Add(predecessor))
|
||||||
|
{
|
||||||
|
toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BlockLocation(null, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool WritesToRegister(OpCode opCode, int regIndex)
|
||||||
|
{
|
||||||
|
// Predicate instruction only ever writes to predicate, so we shouldn't check those.
|
||||||
|
if (opCode.Emitter == InstEmit.Fsetp ||
|
||||||
|
opCode.Emitter == InstEmit.Hsetp2 ||
|
||||||
|
opCode.Emitter == InstEmit.Isetp ||
|
||||||
|
opCode.Emitter == InstEmit.R2p)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return opCode is IOpCodeRd opRd && opRd.Rd.Index == regIndex;
|
||||||
|
}
|
||||||
|
|
||||||
private enum MergeType
|
private enum MergeType
|
||||||
{
|
{
|
||||||
Brk = 0,
|
Brk = 0,
|
||||||
|
@ -388,6 +491,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
{
|
{
|
||||||
OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
|
OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
|
||||||
|
|
||||||
|
Block target = blocks[pushOp.GetAbsoluteAddress()];
|
||||||
|
|
||||||
Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
|
Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
|
||||||
|
|
||||||
HashSet<Block> visited = new HashSet<Block>();
|
HashSet<Block> visited = new HashSet<Block>();
|
||||||
|
@ -497,10 +602,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
if (branchStack.Count == 0)
|
if (branchStack.Count == 0)
|
||||||
{
|
{
|
||||||
// If the entire stack was consumed, then the current pop instruction
|
// If the entire stack was consumed, then the current pop instruction
|
||||||
// just consumed the address from out push instruction.
|
// just consumed the address from our push instruction.
|
||||||
op.Targets.Add(pushOp, op.Targets.Count);
|
if (op.Targets.TryAdd(pushOp, op.Targets.Count))
|
||||||
|
{
|
||||||
pushOp.PopOps.TryAdd(op, Local());
|
pushOp.PopOps.Add(op, Local());
|
||||||
|
target.Predecessors.Add(current);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -7,6 +7,11 @@
|
||||||
// No default log output.
|
// No default log output.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint ConstantBuffer1Read(int offset)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
T MemoryRead<T>(ulong address) where T : unmanaged;
|
T MemoryRead<T>(ulong address) where T : unmanaged;
|
||||||
|
|
||||||
bool MemoryMapped(ulong address)
|
bool MemoryMapped(ulong address)
|
||||||
|
|
|
@ -25,6 +25,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp;
|
OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp;
|
||||||
|
|
||||||
|
if (op.PossibleTargets.Count == 0)
|
||||||
|
{
|
||||||
|
context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{op.Address:X}.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int offset = (int)op.Address + 8 + op.Offset;
|
int offset = (int)op.Address + 8 + op.Offset;
|
||||||
|
|
||||||
Operand address = context.IAdd(Register(op.Ra), Const(offset));
|
Operand address = context.IAdd(Register(op.Ra), Const(offset));
|
||||||
|
|
Loading…
Reference in a new issue