Ryujinx/Ryujinx.Graphics.Vulkan/TextureStorage.cs
riperiperi 6e92b7a378
Dispose Vulkan TextureStorage when views hit 0 instead of immediately (#3738)
Due to the `using` statement being scoped to the `CreateTextureView` method, `TextureStorage` would be disposed as soon as the view was returned.

This was largely fine as the TextureStorage resources were being kept alive by the views holding their own references to them, but it also meant that dispose is only called as soon as the texture is created.

Aliased Storages are TextureStorages created with the same allocation as another TextureStorage, if they have to be aliased as another format. We keep track of a TextureStorage's `_aliasedStorages` as they are created, and dispose them when the TextureStorage is disposed...

...except it is disposed immediately, before any aliased storages are even created. The aliased storages added after this will never be disposed.

This PR attempts to fix this by disposing TextureStorage when its view count reaches 0. The other use of texture storage - the D32S8 blit - still manually disposes the storage, but regular uses created via the GAL are now disposed by the view count.

I think this makes the most sense, as otherwise in the future this behaviour might be forgotton and more things could be added to the Dispose() method that don't work due to it not actually calling at the right time.

This should improve memory leaks in Super Mario Odyssey, most noticeable when resolution scaling. The memory usage of the game is still wildly unpredictable due to how it interacts with the texture cache, but now it shouldn't get considerably longer as you play... I hope. I've seen it typically recover back to the same level occasionally, though it can spike significantly.

Please test a bunch of games on multiple GPUs to make sure this doesn't break anything.
2022-10-18 23:52:08 +00:00

506 lines
17 KiB
C#

using Ryujinx.Common;
using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
using System.Collections.Generic;
using System.Numerics;
using VkBuffer = Silk.NET.Vulkan.Buffer;
using VkFormat = Silk.NET.Vulkan.Format;
namespace Ryujinx.Graphics.Vulkan
{
class TextureStorage : IDisposable
{
private const MemoryPropertyFlags DefaultImageMemoryFlags =
MemoryPropertyFlags.MemoryPropertyDeviceLocalBit;
private const ImageUsageFlags DefaultUsageFlags =
ImageUsageFlags.ImageUsageSampledBit |
ImageUsageFlags.ImageUsageTransferSrcBit |
ImageUsageFlags.ImageUsageTransferDstBit;
public const AccessFlags DefaultAccessMask =
AccessFlags.AccessShaderReadBit |
AccessFlags.AccessShaderWriteBit |
AccessFlags.AccessColorAttachmentReadBit |
AccessFlags.AccessColorAttachmentWriteBit |
AccessFlags.AccessDepthStencilAttachmentReadBit |
AccessFlags.AccessDepthStencilAttachmentWriteBit |
AccessFlags.AccessTransferReadBit |
AccessFlags.AccessTransferWriteBit;
private readonly VulkanRenderer _gd;
private readonly Device _device;
private TextureCreateInfo _info;
public TextureCreateInfo Info => _info;
private readonly Image _image;
private readonly Auto<DisposableImage> _imageAuto;
private readonly Auto<MemoryAllocation> _allocationAuto;
private Auto<MemoryAllocation> _foreignAllocationAuto;
private Dictionary<GAL.Format, TextureStorage> _aliasedStorages;
private AccessFlags _lastModificationAccess;
private PipelineStageFlags _lastModificationStage;
private int _viewsCount;
private ulong _size;
public VkFormat VkFormat { get; }
public float ScaleFactor { get; }
public unsafe TextureStorage(
VulkanRenderer gd,
PhysicalDevice physicalDevice,
Device device,
TextureCreateInfo info,
float scaleFactor,
Auto<MemoryAllocation> foreignAllocation = null)
{
_gd = gd;
_device = device;
_info = info;
ScaleFactor = scaleFactor;
var format = _gd.FormatCapabilities.ConvertToVkFormat(info.Format);
var levels = (uint)info.Levels;
var layers = (uint)info.GetLayers();
var depth = (uint)(info.Target == Target.Texture3D ? info.Depth : 1);
VkFormat = format;
var type = info.Target.Convert();
var extent = new Extent3D((uint)info.Width, (uint)info.Height, depth);
var sampleCountFlags = ConvertToSampleCountFlags((uint)info.Samples);
var usage = DefaultUsageFlags;
if (info.Format.IsDepthOrStencil())
{
usage |= ImageUsageFlags.ImageUsageDepthStencilAttachmentBit;
}
else if (info.Format.IsRtColorCompatible())
{
usage |= ImageUsageFlags.ImageUsageColorAttachmentBit;
}
if (info.Format.IsImageCompatible())
{
usage |= ImageUsageFlags.ImageUsageStorageBit;
}
var flags = ImageCreateFlags.ImageCreateMutableFormatBit;
// This flag causes mipmapped texture arrays to break on AMD GCN, so for that copy dependencies are forced for aliasing as cube.
bool isCube = info.Target == Target.Cubemap || info.Target == Target.CubemapArray;
bool cubeCompatible = gd.IsAmdGcn ? isCube : (info.Width == info.Height && layers >= 6);
if (type == ImageType.ImageType2D && cubeCompatible)
{
flags |= ImageCreateFlags.ImageCreateCubeCompatibleBit;
}
if (type == ImageType.ImageType3D)
{
flags |= ImageCreateFlags.ImageCreate2DArrayCompatibleBit;
}
var imageCreateInfo = new ImageCreateInfo()
{
SType = StructureType.ImageCreateInfo,
ImageType = type,
Format = format,
Extent = extent,
MipLevels = levels,
ArrayLayers = layers,
Samples = sampleCountFlags,
Tiling = ImageTiling.Optimal,
Usage = usage,
SharingMode = SharingMode.Exclusive,
InitialLayout = ImageLayout.Undefined,
Flags = flags
};
gd.Api.CreateImage(device, imageCreateInfo, null, out _image).ThrowOnError();
if (foreignAllocation == null)
{
gd.Api.GetImageMemoryRequirements(device, _image, out var requirements);
var allocation = gd.MemoryAllocator.AllocateDeviceMemory(physicalDevice, requirements, DefaultImageMemoryFlags);
if (allocation.Memory.Handle == 0UL)
{
gd.Api.DestroyImage(device, _image, null);
throw new Exception("Image initialization failed.");
}
_size = requirements.Size;
gd.Api.BindImageMemory(device, _image, allocation.Memory, allocation.Offset).ThrowOnError();
_allocationAuto = new Auto<MemoryAllocation>(allocation);
_imageAuto = new Auto<DisposableImage>(new DisposableImage(_gd.Api, device, _image), null, _allocationAuto);
InitialTransition(ImageLayout.Undefined, ImageLayout.General);
}
else
{
_foreignAllocationAuto = foreignAllocation;
foreignAllocation.IncrementReferenceCount();
var allocation = foreignAllocation.GetUnsafe();
gd.Api.BindImageMemory(device, _image, allocation.Memory, allocation.Offset).ThrowOnError();
_imageAuto = new Auto<DisposableImage>(new DisposableImage(_gd.Api, device, _image));
InitialTransition(ImageLayout.Preinitialized, ImageLayout.General);
}
}
public TextureStorage CreateAliasedColorForDepthStorageUnsafe(GAL.Format format)
{
var colorFormat = format switch
{
GAL.Format.S8Uint => GAL.Format.R8Unorm,
GAL.Format.D16Unorm => GAL.Format.R16Unorm,
GAL.Format.S8UintD24Unorm => GAL.Format.R8G8B8A8Unorm,
GAL.Format.D32Float => GAL.Format.R32Float,
GAL.Format.D24UnormS8Uint => GAL.Format.R8G8B8A8Unorm,
GAL.Format.D32FloatS8Uint => GAL.Format.R32G32Float,
_ => throw new ArgumentException($"\"{format}\" is not a supported depth or stencil format.")
};
return CreateAliasedStorageUnsafe(colorFormat);
}
public TextureStorage CreateAliasedStorageUnsafe(GAL.Format format)
{
if (_aliasedStorages == null || !_aliasedStorages.TryGetValue(format, out var storage))
{
_aliasedStorages ??= new Dictionary<GAL.Format, TextureStorage>();
var info = NewCreateInfoWith(ref _info, format, _info.BytesPerPixel);
storage = new TextureStorage(_gd, default, _device, info, ScaleFactor, _allocationAuto);
_aliasedStorages.Add(format, storage);
}
return storage;
}
public static TextureCreateInfo NewCreateInfoWith(ref TextureCreateInfo info, GAL.Format format, int bytesPerPixel)
{
return NewCreateInfoWith(ref info, format, bytesPerPixel, info.Width, info.Height);
}
public static TextureCreateInfo NewCreateInfoWith(
ref TextureCreateInfo info,
GAL.Format format,
int bytesPerPixel,
int width,
int height)
{
return new TextureCreateInfo(
width,
height,
info.Depth,
info.Levels,
info.Samples,
info.BlockWidth,
info.BlockHeight,
bytesPerPixel,
format,
info.DepthStencilMode,
info.Target,
info.SwizzleR,
info.SwizzleG,
info.SwizzleB,
info.SwizzleA);
}
public Auto<DisposableImage> GetImage()
{
return _imageAuto;
}
public Image GetImageForViewCreation()
{
return _image;
}
public bool HasCommandBufferDependency(CommandBufferScoped cbs)
{
if (_foreignAllocationAuto != null)
{
return _foreignAllocationAuto.HasCommandBufferDependency(cbs);
}
else if (_allocationAuto != null)
{
return _allocationAuto.HasCommandBufferDependency(cbs);
}
return false;
}
private unsafe void InitialTransition(ImageLayout srcLayout, ImageLayout dstLayout)
{
CommandBufferScoped cbs;
bool useTempCbs = !_gd.CommandBufferPool.OwnedByCurrentThread;
if (useTempCbs)
{
cbs = _gd.BackgroundResources.Get().GetPool().Rent();
}
else
{
if (_gd.PipelineInternal != null)
{
cbs = _gd.PipelineInternal.GetPreloadCommandBuffer();
}
else
{
cbs = _gd.CommandBufferPool.Rent();
useTempCbs = true;
}
}
var aspectFlags = _info.Format.ConvertAspectFlags();
var subresourceRange = new ImageSubresourceRange(aspectFlags, 0, (uint)_info.Levels, 0, (uint)_info.GetLayers());
var barrier = new ImageMemoryBarrier()
{
SType = StructureType.ImageMemoryBarrier,
SrcAccessMask = 0,
DstAccessMask = DefaultAccessMask,
OldLayout = srcLayout,
NewLayout = dstLayout,
SrcQueueFamilyIndex = Vk.QueueFamilyIgnored,
DstQueueFamilyIndex = Vk.QueueFamilyIgnored,
Image = _imageAuto.Get(cbs).Value,
SubresourceRange = subresourceRange
};
_gd.Api.CmdPipelineBarrier(
cbs.CommandBuffer,
PipelineStageFlags.PipelineStageTopOfPipeBit,
PipelineStageFlags.PipelineStageAllCommandsBit,
0,
0,
null,
0,
null,
1,
barrier);
if (useTempCbs)
{
cbs.Dispose();
}
}
public static SampleCountFlags ConvertToSampleCountFlags(uint samples)
{
if (samples == 0 || samples > (uint)SampleCountFlags.SampleCount64Bit)
{
return SampleCountFlags.SampleCount1Bit;
}
// Round up to the nearest power of two.
return (SampleCountFlags)(1u << (31 - BitOperations.LeadingZeroCount(samples)));
}
public TextureView CreateView(TextureCreateInfo info, int firstLayer, int firstLevel)
{
return new TextureView(_gd, _device, info, this, firstLayer, firstLevel);
}
public void CopyFromOrToBuffer(
CommandBuffer commandBuffer,
VkBuffer buffer,
Image image,
int size,
bool to,
int x,
int y,
int dstLayer,
int dstLevel,
int dstLayers,
int dstLevels,
bool singleSlice,
ImageAspectFlags aspectFlags,
bool forFlush)
{
bool is3D = Info.Target == Target.Texture3D;
int width = Info.Width;
int height = Info.Height;
int depth = is3D && !singleSlice ? Info.Depth : 1;
int layer = is3D ? 0 : dstLayer;
int layers = dstLayers;
int levels = dstLevels;
int offset = 0;
for (int level = 0; level < levels; level++)
{
int mipSize = Info.GetMipSize(level);
if (forFlush)
{
mipSize = GetBufferDataLength(mipSize);
}
int endOffset = offset + mipSize;
if ((uint)endOffset > (uint)size)
{
break;
}
int rowLength = (Info.GetMipStride(level) / Info.BytesPerPixel) * Info.BlockWidth;
var sl = new ImageSubresourceLayers(
aspectFlags,
(uint)(dstLevel + level),
(uint)layer,
(uint)layers);
var extent = new Extent3D((uint)width, (uint)height, (uint)depth);
int z = is3D ? dstLayer : 0;
var region = new BufferImageCopy(
(ulong)offset,
(uint)BitUtils.AlignUp(rowLength, Info.BlockWidth),
(uint)BitUtils.AlignUp(height, Info.BlockHeight),
sl,
new Offset3D(x, y, z),
extent);
if (to)
{
_gd.Api.CmdCopyImageToBuffer(commandBuffer, image, ImageLayout.General, buffer, 1, region);
}
else
{
_gd.Api.CmdCopyBufferToImage(commandBuffer, buffer, image, ImageLayout.General, 1, region);
}
offset += mipSize;
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
if (Info.Target == Target.Texture3D)
{
depth = Math.Max(1, depth >> 1);
}
}
}
private int GetBufferDataLength(int length)
{
if (NeedsD24S8Conversion())
{
return length * 2;
}
return length;
}
private bool NeedsD24S8Conversion()
{
return FormatCapabilities.IsD24S8(Info.Format) && VkFormat == VkFormat.D32SfloatS8Uint;
}
public void SetModification(AccessFlags accessFlags, PipelineStageFlags stage)
{
_lastModificationAccess = accessFlags;
_lastModificationStage = stage;
}
public void InsertBarrier(CommandBufferScoped cbs, AccessFlags dstAccessFlags, PipelineStageFlags dstStageFlags)
{
if (_lastModificationAccess != AccessFlags.AccessNoneKhr)
{
ImageAspectFlags aspectFlags;
if (_info.Format.IsDepthOrStencil())
{
if (_info.Format == GAL.Format.S8Uint)
{
aspectFlags = ImageAspectFlags.ImageAspectStencilBit;
}
else if (_info.Format == GAL.Format.D16Unorm || _info.Format == GAL.Format.D32Float)
{
aspectFlags = ImageAspectFlags.ImageAspectDepthBit;
}
else
{
aspectFlags = ImageAspectFlags.ImageAspectDepthBit | ImageAspectFlags.ImageAspectStencilBit;
}
}
else
{
aspectFlags = ImageAspectFlags.ImageAspectColorBit;
}
TextureView.InsertImageBarrier(
_gd.Api,
cbs.CommandBuffer,
_imageAuto.Get(cbs).Value,
_lastModificationAccess,
dstAccessFlags,
_lastModificationStage,
dstStageFlags,
aspectFlags,
0,
0,
_info.GetLayers(),
_info.Levels);
_lastModificationAccess = AccessFlags.AccessNoneKhr;
}
}
public void IncrementViewsCount()
{
_viewsCount++;
}
public void DecrementViewsCount()
{
if (--_viewsCount == 0)
{
_gd.PipelineInternal?.FlushCommandsIfWeightExceeding(_imageAuto, _size);
Dispose();
}
}
public void Dispose()
{
if (_aliasedStorages != null)
{
foreach (var storage in _aliasedStorages.Values)
{
storage.Dispose();
}
_aliasedStorages.Clear();
}
_imageAuto.Dispose();
_allocationAuto?.Dispose();
_foreignAllocationAuto?.DecrementReferenceCount();
_foreignAllocationAuto = null;
}
}
}