GPU: Avoid using garbage size for non-cb0 storage buffers (#4999)

* GPU: Avoid using garbage size for non-cb0 storage buffers

In the depths area, Tears of the Kingdom uses a global memory access with address on constant buffer slot 6. This isn't standard and thus doesn't actually have a size 8 bytes after it, so we were reading back a garbage size that ended up very large (at least in version 1.1.0), and would synchronize a lot of data per frame.

This PR makes storage buffers created from addresses outside constant buffer slot 0 get their size as the number of bytes remaining in the GPU mapping starting at the given virtual address. This should bound the buffer to a reasonable size, and ideally stop it crossing into other memory.

* Limit max size

* Add TODO

* Feedback
This commit is contained in:
riperiperi 2023-05-18 07:56:34 +01:00 committed by GitHub
parent b3bf05356b
commit ecbf303266
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 1 deletions

View file

@ -23,6 +23,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
public const int PrimitiveRestartStateIndex = 12; public const int PrimitiveRestartStateIndex = 12;
public const int RenderTargetStateIndex = 27; public const int RenderTargetStateIndex = 27;
private const ulong MaxUnknownStorageSize = 0x100000;
private readonly GpuContext _context; private readonly GpuContext _context;
private readonly GpuChannel _channel; private readonly GpuChannel _channel;
private readonly DeviceStateWithShadow<ThreedClassState> _state; private readonly DeviceStateWithShadow<ThreedClassState> _state;
@ -356,7 +358,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress); SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); uint size;
if (sb.SbCbSlot == 0)
{
// Only trust the SbDescriptor size if it comes from slot 0.
size = (uint)sbDescriptor.Size;
}
else
{
// TODO: Use full mapped size and somehow speed up buffer sync.
size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), MaxUnknownStorageSize);
}
_channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags);
} }
} }
} }

View file

@ -637,6 +637,33 @@ namespace Ryujinx.Graphics.Gpu.Memory
return UnpackPaFromPte(pte) + (va & PageMask); return UnpackPaFromPte(pte) + (va & PageMask);
} }
/// <summary>
/// Translates a GPU virtual address and returns the number of bytes that are mapped after it.
/// </summary>
/// <param name="va">GPU virtual address to be translated</param>
/// <param name="maxSize">Maximum size in bytes to scan</param>
/// <returns>Number of bytes, 0 if unmapped</returns>
public ulong GetMappedSize(ulong va, ulong maxSize)
{
if (!ValidateAddress(va))
{
return 0;
}
ulong startVa = va;
ulong endVa = va + maxSize;
ulong pte = GetPte(va);
while (pte != PteUnmapped && va < endVa)
{
va += PageSize - (va & PageMask);
pte = GetPte(va);
}
return Math.Min(maxSize, va - startVa);
}
/// <summary> /// <summary>
/// Gets the kind of a given memory page. /// Gets the kind of a given memory page.
/// This might indicate the type of resource that can be allocated on the page, and also texture tiling. /// This might indicate the type of resource that can be allocated on the page, and also texture tiling.