Ryujinx/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
riperiperi 76b53e018a
GPU: Add fallback when textureGatherOffsets is not supported (#5792)
* GPU: Add fallback when textureGatherOffsets is not supported.

This PR adds a fallback for GPUs or APIs that don't support an equivalent to the method `textureGatherOffsets`, where each of the 4 gathered texels has an individual offset. This is done by reusing the existing code to handle non-const offsets for texture instructions, though it has also been corrected as there were a few implementation issues.

MoltenVK reports support for this capability, and it didn't error when we initially released the MacOS build, but that has since changed. MVK still reports support, but spirv-cross has been fixed in a way that it _attempts_ to use this capability, but the metal compiler errors since it doesn't exist.

Some other fixes:
- textureGatherOffsets emulation has been changed significantly. It now uses 4 texture sample instructions (not gather), calculates a base texel (i=0 j=0) and adds the offsets onto it before converting into a tex coord. The final result is offset into a texel center, so it shouldn't be subject to interpolation, though this isn't perfect and could have some error with floating point formats with linear sampling. It is subject to texture wrap mode as it should be, which is why texelFetch was not used.
  - Maybe gather should be used here with component `w` (i=0, j=0), though this multiplies number of texels fetched by 4... The way it was doing this before _was_ wrong_, but doing it right would avoid issues with texel center precision.
- textureGatherOffset (singular) now performs textureGather with the offset applied to the coords, rather than the slower fallback where each texel is fetched individually.

* Increment shader cache version, remove unused arg

* Use base texture size for gather coord offset.

Implicit LOD for gather is not supported.

* Use 4 texture gathers for offsets emulation

Avoids issues with interpolation at cost of performance

(not sure how bad this is)

* Address Feedback
2023-10-20 15:05:09 +02:00

313 lines
12 KiB
C#

using OpenTK.Graphics.OpenGL;
using Ryujinx.Common.Configuration;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.OpenGL.Image;
using Ryujinx.Graphics.OpenGL.Queries;
using Ryujinx.Graphics.Shader.Translation;
using System;
namespace Ryujinx.Graphics.OpenGL
{
public sealed class OpenGLRenderer : IRenderer
{
private readonly Pipeline _pipeline;
public IPipeline Pipeline => _pipeline;
private readonly Counters _counters;
private readonly Window _window;
public IWindow Window => _window;
private readonly TextureCopy _textureCopy;
private readonly TextureCopy _backgroundTextureCopy;
internal TextureCopy TextureCopy => BackgroundContextWorker.InBackground ? _backgroundTextureCopy : _textureCopy;
internal TextureCopyIncompatible TextureCopyIncompatible { get; }
internal TextureCopyMS TextureCopyMS { get; }
private readonly Sync _sync;
public event EventHandler<ScreenCaptureImageInfo> ScreenCaptured;
internal PersistentBuffers PersistentBuffers { get; }
internal ResourcePool ResourcePool { get; }
internal int BufferCount { get; private set; }
public string GpuVendor { get; private set; }
public string GpuRenderer { get; private set; }
public string GpuVersion { get; private set; }
public bool PreferThreading => true;
public OpenGLRenderer()
{
_pipeline = new Pipeline();
_counters = new Counters();
_window = new Window(this);
_textureCopy = new TextureCopy(this);
_backgroundTextureCopy = new TextureCopy(this);
TextureCopyIncompatible = new TextureCopyIncompatible(this);
TextureCopyMS = new TextureCopyMS(this);
_sync = new Sync();
PersistentBuffers = new PersistentBuffers();
ResourcePool = new ResourcePool();
}
public BufferHandle CreateBuffer(int size, BufferHandle storageHint)
{
return CreateBuffer(size, GAL.BufferAccess.Default);
}
public BufferHandle CreateBuffer(int size, GAL.BufferAccess access)
{
BufferCount++;
if (access == GAL.BufferAccess.FlushPersistent)
{
BufferHandle handle = Buffer.CreatePersistent(size);
PersistentBuffers.Map(handle, size);
return handle;
}
else
{
return Buffer.Create(size);
}
}
public BufferHandle CreateBuffer(nint pointer, int size)
{
throw new NotSupportedException();
}
public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info)
{
return new Program(shaders, info.FragmentOutputMap);
}
public ISampler CreateSampler(SamplerCreateInfo info)
{
return new Sampler(info);
}
public ITexture CreateTexture(TextureCreateInfo info)
{
if (info.Target == Target.TextureBuffer)
{
return new TextureBuffer(this, info);
}
else
{
return ResourcePool.GetTextureOrNull(info) ?? new TextureStorage(this, info).CreateDefaultView();
}
}
public void DeleteBuffer(BufferHandle buffer)
{
PersistentBuffers.Unmap(buffer);
Buffer.Delete(buffer);
}
public HardwareInfo GetHardwareInfo()
{
return new HardwareInfo(GpuVendor, GpuRenderer);
}
public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size)
{
return Buffer.GetData(this, buffer, offset, size);
}
public Capabilities GetCapabilities()
{
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
return new Capabilities(
api: TargetApi.OpenGL,
vendorName: GpuVendor,
hasFrontFacingBug: intelWindows,
hasVectorIndexingBug: amdWindows,
needsFragmentOutputSpecialization: false,
reduceShaderPrecision: false,
supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
supportsBc123Compression: HwCapabilities.SupportsTextureCompressionS3tc,
supportsBc45Compression: HwCapabilities.SupportsTextureCompressionRgtc,
supportsBc67Compression: true, // Should check BPTC extension, but for some reason NVIDIA is not exposing the extension.
supportsEtc2Compression: true,
supports3DTextureCompression: false,
supportsBgraFormat: false,
supportsR4G4Format: false,
supportsR4G4B4A4Format: true,
supportsSnormBufferTextureFormat: false,
supports5BitComponentFormat: true,
supportsBlendEquationAdvanced: HwCapabilities.SupportsBlendEquationAdvanced,
supportsFragmentShaderInterlock: HwCapabilities.SupportsFragmentShaderInterlock,
supportsFragmentShaderOrderingIntel: HwCapabilities.SupportsFragmentShaderOrdering,
supportsGeometryShader: true,
supportsGeometryShaderPassthrough: HwCapabilities.SupportsGeometryShaderPassthrough,
supportsTransformFeedback: true,
supportsImageLoadFormatted: HwCapabilities.SupportsImageLoadFormatted,
supportsLayerVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
supportsMismatchingViewFormat: HwCapabilities.SupportsMismatchingViewFormat,
supportsCubemapView: true,
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
supportsScaledVertexFormats: true,
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
supportsShaderFloat64: true,
supportsTextureGatherOffsets: true,
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
supportsVertexStoreAndAtomics: true,
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
supportsViewportMask: HwCapabilities.SupportsViewportArray2,
supportsViewportSwizzle: HwCapabilities.SupportsViewportSwizzle,
supportsIndirectParameters: HwCapabilities.SupportsIndirectParameters,
supportsDepthClipControl: true,
maximumUniformBuffersPerStage: 13, // TODO: Avoid hardcoding those limits here and get from driver?
maximumStorageBuffersPerStage: 16,
maximumTexturesPerStage: 32,
maximumImagesPerStage: 8,
maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
shaderSubgroupSize: Constants.MaxSubgroupSize,
storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment,
textureBufferOffsetAlignment: HwCapabilities.TextureBufferOffsetAlignment,
gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan.
}
public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
{
Buffer.SetData(buffer, offset, data);
}
public void UpdateCounters()
{
_counters.Update();
}
public void PreFrame()
{
_sync.Cleanup();
ResourcePool.Tick();
}
public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler, float divisor, bool hostReserved)
{
return _counters.QueueReport(type, resultHandler, divisor, _pipeline.DrawCount, hostReserved);
}
public void Initialize(GraphicsDebugLevel glLogLevel)
{
Debugger.Initialize(glLogLevel);
PrintGpuInformation();
if (HwCapabilities.SupportsParallelShaderCompile)
{
GL.Arb.MaxShaderCompilerThreads(Math.Min(Environment.ProcessorCount, 8));
}
_counters.Initialize();
// This is required to disable [0, 1] clamping for SNorm outputs on compatibility profiles.
// This call is expected to fail if we're running with a core profile,
// as this clamp target was deprecated, but that's fine as a core profile
// should already have the desired behaviour were outputs are not clamped.
GL.ClampColor(ClampColorTarget.ClampFragmentColor, ClampColorMode.False);
}
private void PrintGpuInformation()
{
GpuVendor = GL.GetString(StringName.Vendor);
GpuRenderer = GL.GetString(StringName.Renderer);
GpuVersion = GL.GetString(StringName.Version);
Logger.Notice.Print(LogClass.Gpu, $"{GpuVendor} {GpuRenderer} ({GpuVersion})");
}
public void ResetCounter(CounterType type)
{
_counters.QueueReset(type);
}
public void BackgroundContextAction(Action action, bool alwaysBackground = false)
{
// alwaysBackground is ignored, since we cannot switch from the current context.
if (IOpenGLContext.HasContext())
{
action(); // We have a context already - use that (assuming it is the main one).
}
else
{
_window.BackgroundContext.Invoke(action);
}
}
public void InitializeBackgroundContext(IOpenGLContext baseContext)
{
_window.InitializeBackgroundContext(baseContext);
}
public void Dispose()
{
_textureCopy.Dispose();
_backgroundTextureCopy.Dispose();
TextureCopyMS.Dispose();
PersistentBuffers.Dispose();
ResourcePool.Dispose();
_pipeline.Dispose();
_window.Dispose();
_counters.Dispose();
_sync.Dispose();
}
public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info)
{
return new Program(programBinary, hasFragmentShader, info.FragmentOutputMap);
}
public void CreateSync(ulong id, bool strict)
{
_sync.Create(id);
}
public void WaitSync(ulong id)
{
_sync.Wait(id);
}
public ulong GetCurrentSync()
{
return _sync.GetCurrent();
}
public void SetInterruptAction(Action<Action> interruptAction)
{
// Currently no need for an interrupt action.
}
public void Screenshot()
{
_window.ScreenCaptureRequested = true;
}
public void OnScreenCaptured(ScreenCaptureImageInfo bitmap)
{
ScreenCaptured?.Invoke(this, bitmap);
}
public bool PrepareHostMapping(nint address, ulong size)
{
return false;
}
}
}