From ddf4b92a9cfbe98f798dd86a7c123b065a832d51 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Mon, 29 Mar 2021 21:52:25 +0100 Subject: [PATCH 01/10] Implement parallel host shader cache compilation. --- Ryujinx.Graphics.GAL/IProgram.cs | 2 + Ryujinx.Graphics.GAL/ProgramLinkStatus.cs | 9 + Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 494 +++++++++++------- .../Shader/ShaderCompileTask.cs | 81 +++ Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 2 + Ryujinx.Graphics.OpenGL/Program.cs | 80 ++- Ryujinx.Graphics.OpenGL/Renderer.cs | 16 +- 7 files changed, 452 insertions(+), 232 deletions(-) create mode 100644 Ryujinx.Graphics.GAL/ProgramLinkStatus.cs create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs diff --git a/Ryujinx.Graphics.GAL/IProgram.cs b/Ryujinx.Graphics.GAL/IProgram.cs index 5ab8346f2..272a2f7d6 100644 --- a/Ryujinx.Graphics.GAL/IProgram.cs +++ b/Ryujinx.Graphics.GAL/IProgram.cs @@ -4,6 +4,8 @@ namespace Ryujinx.Graphics.GAL { public interface IProgram : IDisposable { + ProgramLinkStatus CheckProgramLink(bool blocking); + byte[] GetBinary(); } } diff --git a/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs new file mode 100644 index 000000000..5ca1be8c3 --- /dev/null +++ b/Ryujinx.Graphics.GAL/ProgramLinkStatus.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.GAL +{ + public enum ProgramLinkStatus + { + Incomplete, + Success, + Failure + } +} diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index cd20a5a23..96b836c52 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -10,6 +10,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.Threading; +using System.Threading.Tasks; namespace Ryujinx.Graphics.Gpu.Shader { @@ -102,234 +103,327 @@ namespace Ryujinx.Graphics.Gpu.Shader progressReportThread.Start(progressReportEvent); } - for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) + // Make sure these are initialized before doing compilation. + Capabilities caps = _context.Capabilities; + + int maxTaskCount = Math.Min(Environment.ProcessorCount, 8); + int programIndex = 0; + List activeTasks = new List(); + + // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. + // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. + + while (programIndex < guestProgramList.Length || activeTasks.Count > 0) { - Hash128 key = guestProgramList[programIndex]; - - byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); - bool hasHostCache = hostProgramBinary != null; - - IProgram hostProgram = null; - - // If the program sources aren't in the cache, compile from saved guest program. - byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - - if (guestProgram == null) + if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length) { - Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + // Begin a new shader compilation. + Hash128 key = guestProgramList[programIndex]; - // Should not happen, but if someone messed with the cache it's better to catch it. - invalidEntries?.Add(key); + byte[] hostProgramBinary = _cacheManager.GetHostProgramByHash(ref key); + bool hasHostCache = hostProgramBinary != null; - continue; - } + IProgram hostProgram = null; - ReadOnlySpan guestProgramReadOnlySpan = guestProgram; + // If the program sources aren't in the cache, compile from saved guest program. + byte[] guestProgram = _cacheManager.GetGuestProgramByHash(ref key); - ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - - if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) - { - Debug.Assert(cachedShaderEntries.Length == 1); - - GuestShaderCacheEntry entry = cachedShaderEntries[0]; - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) + if (guestProgram == null) { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); + + // Should not happen, but if someone messed with the cache it's better to catch it. + invalidEntries?.Add(key); + + continue; } - bool isHostProgramValid = hostProgram != null; + ReadOnlySpan guestProgramReadOnlySpan = guestProgram; - ShaderProgram program; - ShaderProgramInfo shaderProgramInfo; + ReadOnlySpan cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); - // Reconstruct code holder. - if (isHostProgramValid) + if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); + Debug.Assert(cachedShaderEntries.Length == 1); + + GuestShaderCacheEntry entry = cachedShaderEntries[0]; + + HostShaderCacheEntry[] hostShaderEntries = null; + + // Try loading host shader binary. + if (hasHostCache) + { + hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); + hostProgramBinary = hostProgramBinarySpan.ToArray(); + hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + } + + ShaderCompileTask task = new ShaderCompileTask(); + activeTasks.Add(task); + + task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => + { + ShaderProgram program = null; + ShaderProgramInfo shaderProgramInfo = null; + + Task compileTask = Task.Run(() => + { + // Reconstruct code holder. + if (isHostProgramValid) + { + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); + } + else + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); + } + }); + + task.OnTask(compileTask, (bool _, ShaderCompileTask task) => + { + ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + + // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. + if (!isHostProgramValid) + { + Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + + // Compile shader and create program as the shader program binary got invalidated. + shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); + hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); + + task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => + { + if (!isNewProgramValid) + { + return true; + } + + // As the host program was invalidated, save the new entry in the cache. + hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); + + if (!isReadOnly) + { + if (hasHostCache) + { + _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); + } + else + { + Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); + + _cacheManager.AddHostProgram(ref key, hostProgramBinary); + } + } + + _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + + return true; + }); + + return false; // Not finished: still need to compile the host program. + } + else + { + _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + + return true; + } + }); + + return false; // Not finished: translating the shaders. + }); + } else { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); - program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); - } + ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; + List shaderPrograms = new List(); - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (hostProgram == null) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + TranslationFlags flags = DefaultFlags; - // Compile shader and create program as the shader program binary got invalidated. - shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); - hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); - - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); - - if (!isReadOnly) + if (tfd != null) { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } + flags |= TranslationFlags.Feedback; } + + TranslationCounts counts = new TranslationCounts(); + + HostShaderCacheEntry[] hostShaderEntries = null; + + // Try loading host shader binary. + if (hasHostCache) + { + hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); + hostProgramBinary = hostProgramBinarySpan.ToArray(); + hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); + } + + ShaderCompileTask task = new ShaderCompileTask(); + activeTasks.Add(task); + + GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); + + task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => + { + Task compileTask = Task.Run(() => + { + // Reconstruct code holder. + for (int i = 0; i < entries.Length; i++) + { + GuestShaderCacheEntry entry = entries[i]; + + if (entry == null) + { + continue; + } + + ShaderProgram program; + + if (entry.Header.SizeA != 0) + { + ShaderProgramInfo shaderProgramInfo; + + if (isHostProgramValid) + { + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); + } + else + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts); + TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts); + + program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); + } + + // NOTE: Vertex B comes first in the shader cache. + byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray(); + byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); + + shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); + } + else + { + ShaderProgramInfo shaderProgramInfo; + + if (isHostProgramValid) + { + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); + } + else + { + IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); + + program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo); + } + + shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + } + + shaderPrograms.Add(program); + } + }); + + task.OnTask(compileTask, (bool _, ShaderCompileTask task) => + { + // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. + if (!isHostProgramValid) + { + Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); + + List hostShaders = new List(); + + // Compile shaders and create program as the shader program binary got invalidated. + for (int stage = 0; stage < Constants.ShaderStages; stage++) + { + ShaderProgram program = shaders[stage]?.Program; + + if (program == null) + { + continue; + } + + IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); + + shaders[stage].HostShader = hostShader; + + hostShaders.Add(hostShader); + } + + hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); + + task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => + { + if (!isNewProgramValid) + { + return true; + } + + // As the host program was invalidated, save the new entry in the cache. + hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); + + if (!isReadOnly) + { + if (hasHostCache) + { + _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); + } + else + { + Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); + + _cacheManager.AddHostProgram(ref key, hostProgramBinary); + } + } + + _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); + + return true; + }); + + return false; // Not finished: still need to compile the host program. + } + else + { + _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); + + return true; + } + }); + + return false; // Not finished: translating the shaders. + }); } - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + _shaderCount = ++programIndex; } - else + + // Process the queue. + for (int i = 0; i < activeTasks.Count; i++) { - Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); + ShaderCompileTask task = activeTasks[i]; - ShaderCodeHolder[] shaders = new ShaderCodeHolder[cachedShaderEntries.Length]; - List shaderPrograms = new List(); - - TransformFeedbackDescriptor[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); - - TranslationFlags flags = DefaultFlags; - - if (tfd != null) + if (task.IsDone()) { - flags |= TranslationFlags.Feedback; + activeTasks.RemoveAt(i--); } - - TranslationCounts counts = new TranslationCounts(); - - HostShaderCacheEntry[] hostShaderEntries = null; - - // Try loading host shader binary. - if (hasHostCache) - { - hostShaderEntries = HostShaderCacheEntry.Parse(hostProgramBinary, out ReadOnlySpan hostProgramBinarySpan); - hostProgramBinary = hostProgramBinarySpan.ToArray(); - hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); - } - - bool isHostProgramValid = hostProgram != null; - - // Reconstruct code holder. - for (int i = 0; i < cachedShaderEntries.Length; i++) - { - GuestShaderCacheEntry entry = cachedShaderEntries[i]; - - if (entry == null) - { - continue; - } - - ShaderProgram program; - - if (entry.Header.SizeA != 0) - { - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); - - TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, flags, counts); - TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, flags | TranslationFlags.VertexA, counts); - - program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); - } - - // NOTE: Vertex B comes first in the shader cache. - byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray(); - byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); - } - else - { - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); - - program = Translator.CreateContext(0, gpuAccessor, flags, counts).Translate(out shaderProgramInfo); - } - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); - } - - shaderPrograms.Add(program); - } - - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (!isHostProgramValid) - { - Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); - - List hostShaders = new List(); - - // Compile shaders and create program as the shader program binary got invalidated. - for (int stage = 0; stage < Constants.ShaderStages; stage++) - { - ShaderProgram program = shaders[stage]?.Program; - - if (program == null) - { - continue; - } - - IShader hostShader = _context.Renderer.CompileShader(program.Stage, program.Code); - - shaders[stage].HostShader = hostShader; - - hostShaders.Add(hostShader); - } - - hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); - - // As the host program was invalidated, save the new entry in the cache. - hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); - - if (!isReadOnly) - { - if (hasHostCache) - { - _cacheManager.ReplaceHostProgram(ref key, hostProgramBinary); - } - else - { - Logger.Warning?.Print(LogClass.Gpu, $"Add missing host shader {key} in cache (is the cache incomplete?)"); - - _cacheManager.AddHostProgram(ref key, hostProgramBinary); - } - } - } - - _gpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shaders)); } - _shaderCount = programIndex + 1; + if (activeTasks.Count == maxTaskCount) + { + Thread.Sleep(1); + } } if (!isReadOnly) @@ -458,6 +552,8 @@ namespace Ryujinx.Graphics.Gpu.Shader IProgram hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null); + hostProgram.CheckProgramLink(true); + byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); cpShader = new ShaderBundle(hostProgram, shader); @@ -598,6 +694,8 @@ namespace Ryujinx.Graphics.Gpu.Shader IProgram hostProgram = _context.Renderer.CreateProgram(hostShaders.ToArray(), tfd); + hostProgram.CheckProgramLink(true); + byte[] hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); gpShaders = new ShaderBundle(hostProgram, shaders); diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs new file mode 100644 index 000000000..cc1b322ba --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs @@ -0,0 +1,81 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Threading.Tasks; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + delegate bool ShaderCompileTaskCallback(bool success, ShaderCompileTask task); + + /// + /// A class that represents a shader compilation. + /// + class ShaderCompileTask + { + private bool _compiling; + + private Task _programsTask; + private IProgram _program; + + private ShaderCompileTaskCallback _action; + + /// + /// Check the completion status of the shader compile task, and run callbacks on step completion. + /// Calling this periodically is required to progress through steps of the compilation. + /// + /// True if the task is complete, false if it is in progress + public bool IsDone() + { + if (_compiling) + { + ProgramLinkStatus status = _program.CheckProgramLink(false); + + if (status != ProgramLinkStatus.Incomplete) + { + return _action(status == ProgramLinkStatus.Success, this); + } + } + else + { + // Waiting on the task. + + if (_programsTask.IsCompleted) + { + return _action(true, this); + } + } + + return false; + } + + /// + /// Run a callback when the specified task has completed. + /// + /// The task object that needs to complete + /// The action to perform when it is complete + public void OnTask(Task task, ShaderCompileTaskCallback action) + { + _compiling = false; + + _programsTask = task; + _action = action; + } + + /// + /// Run a callback when the specified program has been linked. + /// + /// The program that needs to be linked + /// The action to perform when linking is complete + public void OnCompiled(IProgram program, ShaderCompileTaskCallback action) + { + _compiling = true; + + _program = program; + _action = action; + + if (program == null) + { + action(false, this); + } + } + } +} diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 6795b4234..08b0e6af8 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -10,6 +10,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsPolygonOffsetClamp = new Lazy(() => HasExtension("GL_EXT_polygon_offset_clamp")); private static readonly Lazy _supportsViewportSwizzle = new Lazy(() => HasExtension("GL_NV_viewport_swizzle")); private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); + private static readonly Lazy _supportsParallelShaderCompile = new Lazy(() => HasExtension("GL_ARB_parallel_shader_compile")); private static readonly Lazy _maximumComputeSharedMemorySize = new Lazy(() => GetLimit(All.MaxComputeSharedMemorySize)); private static readonly Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); @@ -34,6 +35,7 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsPolygonOffsetClamp => _supportsPolygonOffsetClamp.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; + public static bool SupportsParallelShaderCompile => _supportsParallelShaderCompile.Value; public static bool SupportsNonConstantTextureOffset => _gpuVendor.Value == GpuVendor.Nvidia; public static bool RequiresSyncFlush => _gpuVendor.Value == GpuVendor.Amd || _gpuVendor.Value == GpuVendor.IntelWindows || _gpuVendor.Value == GpuVendor.IntelUnix; diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs index d39e181d9..decc75b1e 100644 --- a/Ryujinx.Graphics.OpenGL/Program.cs +++ b/Ryujinx.Graphics.OpenGL/Program.cs @@ -13,11 +13,26 @@ namespace Ryujinx.Graphics.OpenGL { public int Handle { get; private set; } - public int FragmentIsBgraUniform { get; } - public int FragmentRenderScaleUniform { get; } - public int ComputeRenderScaleUniform { get; } + public int FragmentIsBgraUniform { get; private set; } + public int FragmentRenderScaleUniform { get; private set; } + public int ComputeRenderScaleUniform { get; private set; } - public bool IsLinked { get; private set; } + public bool IsLinked + { + get + { + if (_status == ProgramLinkStatus.Incomplete) + { + CheckProgramLink(true); + } + + return _status == ProgramLinkStatus.Success; + } + } + + private bool _initialized; + private ProgramLinkStatus _status = ProgramLinkStatus.Incomplete; + private IShader[] _shaders; public Program(IShader[] shaders, TransformFeedbackDescriptor[] transformFeedbackDescriptors) { @@ -82,18 +97,7 @@ namespace Ryujinx.Graphics.OpenGL GL.LinkProgram(Handle); - for (int index = 0; index < shaders.Length; index++) - { - int shaderHandle = ((Shader)shaders[index]).Handle; - - GL.DetachShader(Handle, shaderHandle); - } - - CheckProgramLink(); - - FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); - FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); - ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); + _shaders = shaders; } public Program(ReadOnlySpan code) @@ -109,32 +113,60 @@ namespace Ryujinx.Graphics.OpenGL GL.ProgramBinary(Handle, binaryFormat, (IntPtr)ptr, code.Length - 4); } } - - CheckProgramLink(); - - FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); - FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); - ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); } public void Bind() { + if (!_initialized) + { + FragmentIsBgraUniform = GL.GetUniformLocation(Handle, "is_bgra"); + FragmentRenderScaleUniform = GL.GetUniformLocation(Handle, "fp_renderScale"); + ComputeRenderScaleUniform = GL.GetUniformLocation(Handle, "cp_renderScale"); + + _initialized = true; + } + GL.UseProgram(Handle); } - private void CheckProgramLink() + public ProgramLinkStatus CheckProgramLink(bool blocking) { + if (!blocking && HwCapabilities.SupportsParallelShaderCompile) + { + GL.GetProgram(Handle, (GetProgramParameterName)ArbParallelShaderCompile.CompletionStatusArb, out int completed); + + if (completed == 0) + { + return ProgramLinkStatus.Incomplete; + } + } + GL.GetProgram(Handle, GetProgramParameterName.LinkStatus, out int status); + if (_shaders != null) + { + for (int index = 0; index < _shaders.Length; index++) + { + int shaderHandle = ((Shader)_shaders[index]).Handle; + + GL.DetachShader(Handle, shaderHandle); + } + + _shaders = null; + } + if (status == 0) { // Use GL.GetProgramInfoLog(Handle), it may be too long to print on the log. + _status = ProgramLinkStatus.Failure; Logger.Debug?.Print(LogClass.Gpu, "Shader linking failed."); } else { - IsLinked = true; + _status = ProgramLinkStatus.Success; } + + return _status; } public byte[] GetBinary() diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index cc8fa195d..0382ba86e 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -130,6 +130,11 @@ namespace Ryujinx.Graphics.OpenGL PrintGpuInformation(); + if (HwCapabilities.SupportsParallelShaderCompile) + { + GL.Arb.MaxShaderCompilerThreads(Math.Min(Environment.ProcessorCount, 8)); + } + _counters.Initialize(); } @@ -177,16 +182,7 @@ namespace Ryujinx.Graphics.OpenGL public IProgram LoadProgramBinary(byte[] programBinary) { - Program program = new Program(programBinary); - - if (program.IsLinked) - { - return program; - } - - program.Dispose(); - - return null; + return new Program(programBinary); } public void CreateSync(ulong id) From 20d560e3f925e387c9c28705a120202ca38abd8b Mon Sep 17 00:00:00 2001 From: riperiperi Date: Fri, 2 Apr 2021 22:47:14 +0100 Subject: [PATCH 02/10] The new host program needs to be saved even if it isn't valid. --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 96b836c52..35908cb91 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -197,11 +197,6 @@ namespace Ryujinx.Graphics.Gpu.Shader task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => { - if (!isNewProgramValid) - { - return true; - } - // As the host program was invalidated, save the new entry in the cache. hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), new ShaderCodeHolder[] { shader }); @@ -365,11 +360,6 @@ namespace Ryujinx.Graphics.Gpu.Shader task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) => { - if (!isNewProgramValid) - { - return true; - } - // As the host program was invalidated, save the new entry in the cache. hostProgramBinary = HostShaderCacheEntry.Create(hostProgram.GetBinary(), shaders); From a0aa09912cb8f35ae06834b08308a128886f207f Mon Sep 17 00:00:00 2001 From: riperiperi Date: Fri, 2 Apr 2021 23:05:55 +0100 Subject: [PATCH 03/10] Use event to wake the main thread on task completion --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 12 +++++++++--- Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs | 13 +++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 35908cb91..5ee41cdfb 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -110,6 +110,8 @@ namespace Ryujinx.Graphics.Gpu.Shader int programIndex = 0; List activeTasks = new List(); + AutoResetEvent taskDoneEvent = new AutoResetEvent(false); + // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. @@ -158,7 +160,7 @@ namespace Ryujinx.Graphics.Gpu.Shader hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); } - ShaderCompileTask task = new ShaderCompileTask(); + ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); activeTasks.Add(task); task.OnCompiled(hostProgram, (bool isHostProgramValid, ShaderCompileTask task) => @@ -261,7 +263,7 @@ namespace Ryujinx.Graphics.Gpu.Shader hostProgram = _context.Renderer.LoadProgramBinary(hostProgramBinary); } - ShaderCompileTask task = new ShaderCompileTask(); + ShaderCompileTask task = new ShaderCompileTask(taskDoneEvent); activeTasks.Add(task); GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); @@ -412,7 +414,11 @@ namespace Ryujinx.Graphics.Gpu.Shader if (activeTasks.Count == maxTaskCount) { - Thread.Sleep(1); + // Wait for a task to be done, or for 1ms. + // Host shader compilation cannot signal when it is done, + // so the 1ms timeout is required to poll status. + + taskDoneEvent.WaitOne(1); } } diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs index cc1b322ba..ff48fab00 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs @@ -1,5 +1,6 @@ using Ryujinx.Graphics.GAL; using System; +using System.Threading; using System.Threading.Tasks; namespace Ryujinx.Graphics.Gpu.Shader @@ -17,6 +18,16 @@ namespace Ryujinx.Graphics.Gpu.Shader private IProgram _program; private ShaderCompileTaskCallback _action; + private AutoResetEvent _taskDoneEvent; + + /// + /// Create a new shader compile task, with an event to signal whenever a subtask completes. + /// + /// Event to signal when a subtask completes + public ShaderCompileTask(AutoResetEvent taskDoneEvent) + { + _taskDoneEvent = taskDoneEvent; + } /// /// Check the completion status of the shader compile task, and run callbacks on step completion. @@ -58,6 +69,8 @@ namespace Ryujinx.Graphics.Gpu.Shader _programsTask = task; _action = action; + + task.ContinueWith(task => _taskDoneEvent.Set()); } /// From 35eac315ab6f5cfb089422794e695fda3e9cfd53 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 4 Apr 2021 14:01:33 +0100 Subject: [PATCH 04/10] The task isn't required for loading compute binary. --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 53 +++++++++++----------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 5ee41cdfb..aae4d1c75 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -114,7 +114,7 @@ namespace Ryujinx.Graphics.Gpu.Shader // This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background. // The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once. - + while (programIndex < guestProgramList.Length || activeTasks.Count > 0) { if (activeTasks.Count < maxTaskCount && programIndex < guestProgramList.Length) @@ -168,29 +168,34 @@ namespace Ryujinx.Graphics.Gpu.Shader ShaderProgram program = null; ShaderProgramInfo shaderProgramInfo = null; - Task compileTask = Task.Run(() => + if (isHostProgramValid) { // Reconstruct code holder. - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); - } - else + + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[0].ToShaderProgramInfo(); + + ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + + _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + + return true; + } + else + { + // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. + + Task compileTask = Task.Run(() => { IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors); program = Translator.CreateContext(0, gpuAccessor, DefaultFlags | TranslationFlags.Compute).Translate(out shaderProgramInfo); - } - }); + }); - task.OnTask(compileTask, (bool _, ShaderCompileTask task) => - { - ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); - - // If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again. - if (!isHostProgramValid) + task.OnTask(compileTask, (bool _, ShaderCompileTask task) => { + ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code); + Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest..."); // Compile shader and create program as the shader program binary got invalidated. @@ -222,18 +227,12 @@ namespace Ryujinx.Graphics.Gpu.Shader }); return false; // Not finished: still need to compile the host program. - } - else - { - _cpProgramsDiskCache.Add(key, new ShaderBundle(hostProgram, shader)); + }); - return true; - } - }); - - return false; // Not finished: translating the shaders. + return false; // Not finished: translating the program. + } }); - + } else { @@ -394,7 +393,7 @@ namespace Ryujinx.Graphics.Gpu.Shader } }); - return false; // Not finished: translating the shaders. + return false; // Not finished: translating the program. }); } From b1c3e01691507709bc4a6a23f02396f3b97803e5 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 4 Apr 2021 19:15:15 +0100 Subject: [PATCH 05/10] Nit --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index aae4d1c75..2aef2f256 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -232,7 +232,6 @@ namespace Ryujinx.Graphics.Gpu.Shader return false; // Not finished: translating the program. } }); - } else { From 1239c82d2f2fa59a42561a4a278a9cdca5eca462 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 7 Apr 2021 13:19:02 +0100 Subject: [PATCH 06/10] Forcibly enable threaded optimization on boot. --- .../GraphicsDriver/DriverUtilities.cs | 22 +++ Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs | 11 ++ .../NVAPI/NvapiUnicodeString.cs | 42 +++++ .../NVAPI/NvdrsApplicationV4.cs | 17 ++ .../GraphicsDriver/NVAPI/NvdrsProfile.cs | 16 ++ .../GraphicsDriver/NVAPI/NvdrsSetting.cs | 49 ++++++ .../GraphicsDriver/NVThreadedOptimization.cs | 163 ++++++++++++++++++ Ryujinx/Program.cs | 10 +- 8 files changed, 327 insertions(+), 3 deletions(-) create mode 100644 Ryujinx.Common/GraphicsDriver/DriverUtilities.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs create mode 100644 Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs diff --git a/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs b/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs new file mode 100644 index 000000000..60c176f83 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs @@ -0,0 +1,22 @@ +using System; + +namespace Ryujinx.Common.GraphicsDriver +{ + public static class DriverUtilities + { + public static void ToggleOGLThreading(bool enabled) + { + Environment.SetEnvironmentVariable("mesa_glthread", enabled.ToString()); + Environment.SetEnvironmentVariable("__GL_THREADED_OPTIMIZATIONS", enabled ? "1" : "0"); + + try + { + NVThreadedOptimization.SetThreadedOptimization(enabled); + } + catch + { + // NVAPI is not available, or couldn't change the application profile. + } + } + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs new file mode 100644 index 000000000..99eaa68f4 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/Nvapi.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + enum Nvapi : uint + { + OglThreadControlId = 0x20C1221E, + + OglThreadControlDefault = 0, + OglThreadControlEnable = 1, + OglThreadControlDisable = 2 + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs new file mode 100644 index 000000000..bfa039b89 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs @@ -0,0 +1,42 @@ +using System.Runtime.InteropServices; +using System.Text; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 4)] + public unsafe struct NvapiUnicodeString + { + public fixed byte Data[4096]; + + public NvapiUnicodeString(string text) + { + Set(text); + } + + public string Get() + { + fixed (byte* data = Data) + { + string text = Encoding.Unicode.GetString(data, 4096); + + int index = text.IndexOf('\0'); + if (index > -1) + { + text = text.Remove(index); + } + + return text; + } + } + + public void Set(string text) + { + text += '\0'; + fixed (char* textPtr = text) + fixed (byte* data = Data) + { + int written = Encoding.Unicode.GetBytes(textPtr, text.Length, data, 4096); + } + } + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs new file mode 100644 index 000000000..8b472cd16 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsApplicationV4.cs @@ -0,0 +1,17 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 4)] + unsafe struct NvdrsApplicationV4 + { + public uint Version; + public uint IsPredefined; + public NvapiUnicodeString AppName; + public NvapiUnicodeString UserFriendlyName; + public NvapiUnicodeString Launcher; + public NvapiUnicodeString FileInFolder; + public uint Flags; + public NvapiUnicodeString CommandLine; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs new file mode 100644 index 000000000..5a325d082 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsProfile.cs @@ -0,0 +1,16 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + [StructLayout(LayoutKind.Sequential, Pack = 1)] + unsafe struct NvdrsProfile + { + public uint Version; + public NvapiUnicodeString ProfileName; + public uint GpuSupport; + public uint IsPredefined; + public uint NumOfApps; + public uint NumOfSettings; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs new file mode 100644 index 000000000..ac188b35d --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvdrsSetting.cs @@ -0,0 +1,49 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver.NVAPI +{ + enum NvdrsSettingType : uint + { + NvdrsDwordType, + NvdrsBinaryType, + NvdrsStringType, + NvdrsWstringType, + } + + enum NvdrsSettingLocation : uint + { + NvdrsCurrentProfileLocation, + NvdrsGlobalProfileLocation, + NvdrsBaseProfileLocation, + NvdrsDefaultProfileLocation, + } + + [StructLayout(LayoutKind.Explicit, Size = 0x3020)] + unsafe struct NvdrsSetting + { + [FieldOffset(0x0)] + public uint Version; + [FieldOffset(0x4)] + public NvapiUnicodeString SettingName; + [FieldOffset(0x1004)] + public Nvapi SettingId; + [FieldOffset(0x1008)] + public NvdrsSettingType SettingType; + [FieldOffset(0x100C)] + public NvdrsSettingLocation SettingLocation; + [FieldOffset(0x1010)] + public uint IsCurrentPredefined; + [FieldOffset(0x1014)] + public uint IsPredefinedValid; + + [FieldOffset(0x1018)] + public uint PredefinedValue; + [FieldOffset(0x1018)] + public NvapiUnicodeString PredefinedString; + + [FieldOffset(0x201C)] + public uint CurrentValue; + [FieldOffset(0x201C)] + public NvapiUnicodeString CurrentString; + } +} diff --git a/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs new file mode 100644 index 000000000..fee8336c6 --- /dev/null +++ b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs @@ -0,0 +1,163 @@ +using Ryujinx.Common.GraphicsDriver.NVAPI; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.GraphicsDriver +{ + static class NVThreadedOptimization + { + private const string ProfileName = "Ryujinx Nvidia Profile"; + + private const uint NvAPI_Initialize_ID = 0x0150E828; + private const uint NvAPI_DRS_CreateSession_ID = 0x0694D52E; + private const uint NvAPI_DRS_LoadSettings_ID = 0x375DBD6B; + private const uint NvAPI_DRS_FindProfileByName_ID = 0x7E4A9A0B; + private const uint NvAPI_DRS_CreateProfile_ID = 0x0CC176068; + private const uint NvAPI_DRS_CreateApplication_ID = 0x4347A9DE; + private const uint NvAPI_DRS_SetSetting_ID = 0x577DD202; + private const uint NvAPI_DRS_SaveSettings_ID = 0xFCBC7E14; + private const uint NvAPI_DRS_DestroySession_ID = 0x0DAD9CFF8; + + [DllImport("nvapi64")] + private static extern IntPtr nvapi_QueryInterface(uint id); + + private delegate int NvAPI_InitializeDelegate(); + private static NvAPI_InitializeDelegate NvAPI_Initialize; + + private delegate int NvAPI_DRS_CreateSessionDelegate(out long handle); + private static NvAPI_DRS_CreateSessionDelegate NvAPI_DRS_CreateSession; + + private delegate int NvAPI_DRS_LoadSettingsDelegate(long handle); + private static NvAPI_DRS_LoadSettingsDelegate NvAPI_DRS_LoadSettings; + + private delegate int NvAPI_DRS_FindProfileByNameDelegate(long handle, NvapiUnicodeString profileName, out long profileHandle); + private static NvAPI_DRS_FindProfileByNameDelegate NvAPI_DRS_FindProfileByName; + + private delegate int NvAPI_DRS_CreateProfileDelegate(long handle, ref NvdrsProfile profileInfo, out long profileHandle); + private static NvAPI_DRS_CreateProfileDelegate NvAPI_DRS_CreateProfile; + + private delegate int NvAPI_DRS_CreateApplicationDelegate(long handle, long profileHandle, ref NvdrsApplicationV4 app); + private static NvAPI_DRS_CreateApplicationDelegate NvAPI_DRS_CreateApplication; + + private delegate int NvAPI_DRS_SetSettingDelegate(long handle, long profileHandle, ref NvdrsSetting setting); + private static NvAPI_DRS_SetSettingDelegate NvAPI_DRS_SetSetting; + + private delegate int NvAPI_DRS_SaveSettingsDelegate(long handle); + private static NvAPI_DRS_SaveSettingsDelegate NvAPI_DRS_SaveSettings; + + private delegate int NvAPI_DRS_DestroySessionDelegate(long handle); + private static NvAPI_DRS_DestroySessionDelegate NvAPI_DRS_DestroySession; + + private static bool _initialized; + + private static void Check(int status) + { + if (status != 0) + { + throw new Exception($"NVAPI Error: {status}"); + } + } + + private static void Initialize() + { + if (!_initialized) + { + NvAPI_Initialize = NvAPI_Delegate(NvAPI_Initialize_ID); + + Check(NvAPI_Initialize()); + + NvAPI_DRS_CreateSession = NvAPI_Delegate(NvAPI_DRS_CreateSession_ID); + NvAPI_DRS_LoadSettings = NvAPI_Delegate(NvAPI_DRS_LoadSettings_ID); + NvAPI_DRS_FindProfileByName = NvAPI_Delegate(NvAPI_DRS_FindProfileByName_ID); + NvAPI_DRS_CreateProfile = NvAPI_Delegate(NvAPI_DRS_CreateProfile_ID); + NvAPI_DRS_CreateApplication = NvAPI_Delegate(NvAPI_DRS_CreateApplication_ID); + NvAPI_DRS_SetSetting = NvAPI_Delegate(NvAPI_DRS_SetSetting_ID); + NvAPI_DRS_SaveSettings = NvAPI_Delegate(NvAPI_DRS_SaveSettings_ID); + NvAPI_DRS_DestroySession = NvAPI_Delegate(NvAPI_DRS_DestroySession_ID); + + _initialized = true; + } + } + + private static uint MakeVersion(uint version) where T : unmanaged + { + return (uint)Unsafe.SizeOf() | version << 16; + } + + public static unsafe void SetThreadedOptimization(bool enabled) + { + Initialize(); + + uint targetValue = (uint)(enabled ? Nvapi.OglThreadControlEnable : Nvapi.OglThreadControlDisable); + + Check(NvAPI_Initialize()); + + Check(NvAPI_DRS_CreateSession(out long handle)); + + Check(NvAPI_DRS_LoadSettings(handle)); + + long profileHandle; + + // Check if the profile already exists. + + int status = NvAPI_DRS_FindProfileByName(handle, new NvapiUnicodeString(ProfileName), out profileHandle); + + if (status != 0) + { + NvdrsProfile profile = new NvdrsProfile { + Version = MakeVersion(1), + IsPredefined = 0, + GpuSupport = uint.MaxValue + }; + profile.ProfileName.Set(ProfileName); + Check(NvAPI_DRS_CreateProfile(handle, ref profile, out profileHandle)); + + NvdrsApplicationV4 application = new NvdrsApplicationV4 + { + Version = MakeVersion(4), + IsPredefined = 0, + Flags = 3 // IsMetro, IsCommandLine + }; + application.AppName.Set("Ryujinx.exe"); + application.UserFriendlyName.Set("Ryujinx"); + application.Launcher.Set(""); + application.FileInFolder.Set(""); + + Check(NvAPI_DRS_CreateApplication(handle, profileHandle, ref application)); + } + + NvdrsSetting setting = new NvdrsSetting + { + Version = MakeVersion(1), + SettingId = Nvapi.OglThreadControlId, + SettingType = NvdrsSettingType.NvdrsDwordType, + SettingLocation = NvdrsSettingLocation.NvdrsCurrentProfileLocation, + IsCurrentPredefined = 0, + IsPredefinedValid = 0, + CurrentValue = targetValue, + PredefinedValue = targetValue + }; + + Check(NvAPI_DRS_SetSetting(handle, profileHandle, ref setting)); + + Check(NvAPI_DRS_SaveSettings(handle)); + + NvAPI_DRS_DestroySession(handle); + } + + private static T NvAPI_Delegate(uint id) where T : class + { + IntPtr ptr = nvapi_QueryInterface(id); + + if (ptr != IntPtr.Zero) + { + return Marshal.GetDelegateForFunctionPointer(ptr, typeof(T)) as T; + } + else + { + return null; + } + } + } +} diff --git a/Ryujinx/Program.cs b/Ryujinx/Program.cs index 4df82da66..db31a2e56 100644 --- a/Ryujinx/Program.cs +++ b/Ryujinx/Program.cs @@ -1,6 +1,7 @@ using ARMeilleure.Translation.PTC; using Gtk; using Ryujinx.Common.Configuration; +using Ryujinx.Common.GraphicsDriver; using Ryujinx.Common.Logging; using Ryujinx.Common.System; using Ryujinx.Common.SystemInfo; @@ -136,6 +137,12 @@ namespace Ryujinx // Logging system information. PrintSystemInfo(); + // Force dedicated GPU if we can. + ForceDedicatedGpu.Nvidia(); + + // Enable OGL multithreading on the driver, when available. + DriverUtilities.ToggleOGLThreading(true); + // Initialize Gtk. Application.Init(); @@ -147,9 +154,6 @@ namespace Ryujinx UserErrorDialog.CreateUserErrorDialog(UserError.NoKeys); } - // Force dedicated GPU if we can. - ForceDedicatedGpu.Nvidia(); - // Show the main window UI. MainWindow mainWindow = new MainWindow(); mainWindow.Show(); From d6547abebf4fc1c6fd80b95f8baa0084f324982d Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 7 Apr 2021 18:15:17 +0100 Subject: [PATCH 07/10] Use IntPtr for handles, remove unsafe keyword --- .../GraphicsDriver/NVThreadedOptimization.cs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs index fee8336c6..ad2236310 100644 --- a/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs +++ b/Ryujinx.Common/GraphicsDriver/NVThreadedOptimization.cs @@ -25,28 +25,28 @@ namespace Ryujinx.Common.GraphicsDriver private delegate int NvAPI_InitializeDelegate(); private static NvAPI_InitializeDelegate NvAPI_Initialize; - private delegate int NvAPI_DRS_CreateSessionDelegate(out long handle); + private delegate int NvAPI_DRS_CreateSessionDelegate(out IntPtr handle); private static NvAPI_DRS_CreateSessionDelegate NvAPI_DRS_CreateSession; - private delegate int NvAPI_DRS_LoadSettingsDelegate(long handle); + private delegate int NvAPI_DRS_LoadSettingsDelegate(IntPtr handle); private static NvAPI_DRS_LoadSettingsDelegate NvAPI_DRS_LoadSettings; - private delegate int NvAPI_DRS_FindProfileByNameDelegate(long handle, NvapiUnicodeString profileName, out long profileHandle); + private delegate int NvAPI_DRS_FindProfileByNameDelegate(IntPtr handle, NvapiUnicodeString profileName, out IntPtr profileHandle); private static NvAPI_DRS_FindProfileByNameDelegate NvAPI_DRS_FindProfileByName; - private delegate int NvAPI_DRS_CreateProfileDelegate(long handle, ref NvdrsProfile profileInfo, out long profileHandle); + private delegate int NvAPI_DRS_CreateProfileDelegate(IntPtr handle, ref NvdrsProfile profileInfo, out IntPtr profileHandle); private static NvAPI_DRS_CreateProfileDelegate NvAPI_DRS_CreateProfile; - private delegate int NvAPI_DRS_CreateApplicationDelegate(long handle, long profileHandle, ref NvdrsApplicationV4 app); + private delegate int NvAPI_DRS_CreateApplicationDelegate(IntPtr handle, IntPtr profileHandle, ref NvdrsApplicationV4 app); private static NvAPI_DRS_CreateApplicationDelegate NvAPI_DRS_CreateApplication; - private delegate int NvAPI_DRS_SetSettingDelegate(long handle, long profileHandle, ref NvdrsSetting setting); + private delegate int NvAPI_DRS_SetSettingDelegate(IntPtr handle, IntPtr profileHandle, ref NvdrsSetting setting); private static NvAPI_DRS_SetSettingDelegate NvAPI_DRS_SetSetting; - private delegate int NvAPI_DRS_SaveSettingsDelegate(long handle); + private delegate int NvAPI_DRS_SaveSettingsDelegate(IntPtr handle); private static NvAPI_DRS_SaveSettingsDelegate NvAPI_DRS_SaveSettings; - private delegate int NvAPI_DRS_DestroySessionDelegate(long handle); + private delegate int NvAPI_DRS_DestroySessionDelegate(IntPtr handle); private static NvAPI_DRS_DestroySessionDelegate NvAPI_DRS_DestroySession; private static bool _initialized; @@ -85,7 +85,7 @@ namespace Ryujinx.Common.GraphicsDriver return (uint)Unsafe.SizeOf() | version << 16; } - public static unsafe void SetThreadedOptimization(bool enabled) + public static void SetThreadedOptimization(bool enabled) { Initialize(); @@ -93,11 +93,11 @@ namespace Ryujinx.Common.GraphicsDriver Check(NvAPI_Initialize()); - Check(NvAPI_DRS_CreateSession(out long handle)); + Check(NvAPI_DRS_CreateSession(out IntPtr handle)); Check(NvAPI_DRS_LoadSettings(handle)); - long profileHandle; + IntPtr profileHandle; // Check if the profile already exists. From 9e68f5026ec9969922868ee018eb603ca63970bd Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 14 Apr 2021 23:15:25 +0100 Subject: [PATCH 08/10] Fix skipping missing shaders --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 2aef2f256..bfd46c823 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -137,6 +137,8 @@ namespace Ryujinx.Graphics.Gpu.Shader // Should not happen, but if someone messed with the cache it's better to catch it. invalidEntries?.Add(key); + _shaderCount = ++programIndex; + continue; } From 3f126487abc94cacc7ffab59b2a87b270d3cfe15 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 14 Apr 2021 23:27:12 +0100 Subject: [PATCH 09/10] Data should not be public for nvapi unicode string --- Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs index bfa039b89..6bbff2de1 100644 --- a/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs +++ b/Ryujinx.Common/GraphicsDriver/NVAPI/NvapiUnicodeString.cs @@ -6,7 +6,7 @@ namespace Ryujinx.Common.GraphicsDriver.NVAPI [StructLayout(LayoutKind.Sequential, Pack = 4)] public unsafe struct NvapiUnicodeString { - public fixed byte Data[4096]; + private fixed byte _data[4096]; public NvapiUnicodeString(string text) { @@ -15,7 +15,7 @@ namespace Ryujinx.Common.GraphicsDriver.NVAPI public string Get() { - fixed (byte* data = Data) + fixed (byte* data = _data) { string text = Encoding.Unicode.GetString(data, 4096); @@ -33,7 +33,7 @@ namespace Ryujinx.Common.GraphicsDriver.NVAPI { text += '\0'; fixed (char* textPtr = text) - fixed (byte* data = Data) + fixed (byte* data = _data) { int written = Encoding.Unicode.GetBytes(textPtr, text.Length, data, 4096); } From 778011c12ae7905173e02665f397a96686863b89 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 19 May 2021 19:26:23 +0100 Subject: [PATCH 10/10] Remove ForceDedicatedGpu, as the driver utilities will now do that. --- Ryujinx.Common/System/ForceDedicatedGpu.cs | 16 ---------------- Ryujinx/Program.cs | 3 --- 2 files changed, 19 deletions(-) delete mode 100644 Ryujinx.Common/System/ForceDedicatedGpu.cs diff --git a/Ryujinx.Common/System/ForceDedicatedGpu.cs b/Ryujinx.Common/System/ForceDedicatedGpu.cs deleted file mode 100644 index 60272f1af..000000000 --- a/Ryujinx.Common/System/ForceDedicatedGpu.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System.Runtime.InteropServices; - -namespace Ryujinx.Common.System -{ - public static class ForceDedicatedGpu - { - public static void Nvidia() - { - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - // NOTE: If the DLL exists, we can load it to force the usage of the dedicated Nvidia Gpu. - NativeLibrary.TryLoad("nvapi64.dll", out _); - } - } - } -} \ No newline at end of file diff --git a/Ryujinx/Program.cs b/Ryujinx/Program.cs index db31a2e56..b1fe41a7d 100644 --- a/Ryujinx/Program.cs +++ b/Ryujinx/Program.cs @@ -137,9 +137,6 @@ namespace Ryujinx // Logging system information. PrintSystemInfo(); - // Force dedicated GPU if we can. - ForceDedicatedGpu.Nvidia(); - // Enable OGL multithreading on the driver, when available. DriverUtilities.ToggleOGLThreading(true);