c9c65af59e
* Initial implementation of fast 2d copy TODO: Partial copy for mismatching region/size. * WIP * Cleanup * Update Ryujinx.Graphics.Gpu/Engine/Twod/TwodClass.cs Co-authored-by: gdkchan <gab.dark.100@gmail.com> Co-authored-by: gdkchan <gab.dark.100@gmail.com>
374 lines
15 KiB
C#
374 lines
15 KiB
C#
using Ryujinx.Common;
|
|
using Ryujinx.Graphics.Device;
|
|
using Ryujinx.Graphics.GAL;
|
|
using Ryujinx.Graphics.Gpu.Engine.Types;
|
|
using Ryujinx.Graphics.Gpu.Image;
|
|
using Ryujinx.Graphics.Texture;
|
|
using Ryujinx.Memory;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
using System.Runtime.Intrinsics;
|
|
|
|
namespace Ryujinx.Graphics.Gpu.Engine.Twod
|
|
{
|
|
/// <summary>
|
|
/// Represents a 2D engine class.
|
|
/// </summary>
|
|
class TwodClass : IDeviceState
|
|
{
|
|
private readonly GpuChannel _channel;
|
|
private readonly DeviceState<TwodClassState> _state;
|
|
|
|
/// <summary>
|
|
/// Creates a new instance of the 2D engine class.
|
|
/// </summary>
|
|
/// <param name="channel">The channel that will make use of the engine</param>
|
|
public TwodClass(GpuChannel channel)
|
|
{
|
|
_channel = channel;
|
|
_state = new DeviceState<TwodClassState>(new Dictionary<string, RwCallback>
|
|
{
|
|
{ nameof(TwodClassState.PixelsFromMemorySrcY0Int), new RwCallback(PixelsFromMemorySrcY0Int, null) }
|
|
});
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reads data from the class registers.
|
|
/// </summary>
|
|
/// <param name="offset">Register byte offset</param>
|
|
/// <returns>Data at the specified offset</returns>
|
|
public int Read(int offset) => _state.Read(offset);
|
|
|
|
/// <summary>
|
|
/// Writes data to the class registers.
|
|
/// </summary>
|
|
/// <param name="offset">Register byte offset</param>
|
|
/// <param name="data">Data to be written</param>
|
|
public void Write(int offset, int data) => _state.Write(offset, data);
|
|
|
|
/// <summary>
|
|
/// Determines if data is compatible between the source and destination texture.
|
|
/// The two textures must have the same size, layout, and bytes per pixel.
|
|
/// </summary>
|
|
/// <param name="lhs">Info for the first texture</param>
|
|
/// <param name="rhs">Info for the second texture</param>
|
|
/// <param name="lhsFormat">Format of the first texture</param>
|
|
/// <param name="rhsFormat">Format of the second texture</param>
|
|
/// <returns>True if the data is compatible, false otherwise</returns>
|
|
private bool IsDataCompatible(TwodTexture lhs, TwodTexture rhs, FormatInfo lhsFormat, FormatInfo rhsFormat)
|
|
{
|
|
if (lhsFormat.BytesPerPixel != rhsFormat.BytesPerPixel ||
|
|
lhs.Height != rhs.Height ||
|
|
lhs.Depth != rhs.Depth ||
|
|
lhs.LinearLayout != rhs.LinearLayout ||
|
|
lhs.MemoryLayout.Packed != rhs.MemoryLayout.Packed)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (lhs.LinearLayout)
|
|
{
|
|
return lhs.Stride == rhs.Stride;
|
|
}
|
|
else
|
|
{
|
|
return lhs.Width == rhs.Width;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Determine if the given region covers the full texture, also considering width alignment.
|
|
/// </summary>
|
|
/// <param name="texture">The texture to check</param>
|
|
/// <param name="formatInfo"></param>
|
|
/// <param name="x1">Region start x</param>
|
|
/// <param name="y1">Region start y</param>
|
|
/// <param name="x2">Region end x</param>
|
|
/// <param name="y2">Region end y</param>
|
|
/// <returns>True if the region covers the full texture, false otherwise</returns>
|
|
private bool IsCopyRegionComplete(TwodTexture texture, FormatInfo formatInfo, int x1, int y1, int x2, int y2)
|
|
{
|
|
if (x1 != 0 || y1 != 0 || y2 != texture.Height)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
int width;
|
|
int widthAlignment;
|
|
|
|
if (texture.LinearLayout)
|
|
{
|
|
widthAlignment = 1;
|
|
width = texture.Stride / formatInfo.BytesPerPixel;
|
|
}
|
|
else
|
|
{
|
|
widthAlignment = Constants.GobAlignment / formatInfo.BytesPerPixel;
|
|
width = texture.Width;
|
|
}
|
|
|
|
return width == BitUtils.AlignUp(x2, widthAlignment);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs a full data copy between two textures, reading and writing guest memory directly.
|
|
/// The textures must have a matching layout, size, and bytes per pixel.
|
|
/// </summary>
|
|
/// <param name="src">The source texture</param>
|
|
/// <param name="dst">The destination texture</param>
|
|
/// <param name="w">Copy width</param>
|
|
/// <param name="h">Copy height</param>
|
|
/// <param name="bpp">Bytes per pixel</param>
|
|
private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp)
|
|
{
|
|
var srcCalculator = new OffsetCalculator(
|
|
w,
|
|
h,
|
|
src.Stride,
|
|
src.LinearLayout,
|
|
src.MemoryLayout.UnpackGobBlocksInY(),
|
|
src.MemoryLayout.UnpackGobBlocksInZ(),
|
|
bpp);
|
|
|
|
(int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h);
|
|
|
|
var memoryManager = _channel.MemoryManager;
|
|
|
|
ulong srcGpuVa = src.Address.Pack();
|
|
ulong dstGpuVa = dst.Address.Pack();
|
|
|
|
ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true);
|
|
|
|
int width;
|
|
int height = src.Height;
|
|
if (src.LinearLayout)
|
|
{
|
|
width = src.Stride / bpp;
|
|
}
|
|
else
|
|
{
|
|
width = src.Width;
|
|
}
|
|
|
|
// If the copy is not equal to the width and height of the texture, we will need to copy partially.
|
|
// It's worth noting that it has already been established that the src and dst are the same size.
|
|
|
|
if (w == width && h == height)
|
|
{
|
|
memoryManager.Write(dstGpuVa, srcSpan);
|
|
}
|
|
else
|
|
{
|
|
using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true);
|
|
Span<byte> dstSpan = dstRegion.Memory.Span;
|
|
|
|
if (src.LinearLayout)
|
|
{
|
|
int stride = src.Stride;
|
|
int offset = 0;
|
|
int lineSize = width * bpp;
|
|
|
|
for (int y = 0; y < height; y++)
|
|
{
|
|
srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset));
|
|
|
|
offset += stride;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Copy with the block linear layout in mind.
|
|
// Recreate the offset calculate with bpp 1 for copy.
|
|
|
|
int stride = w * bpp;
|
|
|
|
srcCalculator = new OffsetCalculator(
|
|
stride,
|
|
h,
|
|
0,
|
|
false,
|
|
src.MemoryLayout.UnpackGobBlocksInY(),
|
|
src.MemoryLayout.UnpackGobBlocksInZ(),
|
|
1);
|
|
|
|
int strideTrunc = BitUtils.AlignDown(stride, 16);
|
|
|
|
ReadOnlySpan<Vector128<byte>> srcVec = MemoryMarshal.Cast<byte, Vector128<byte>>(srcSpan);
|
|
Span<Vector128<byte>> dstVec = MemoryMarshal.Cast<byte, Vector128<byte>>(dstSpan);
|
|
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
int x = 0;
|
|
|
|
srcCalculator.SetY(y);
|
|
|
|
for (; x < strideTrunc; x += 16)
|
|
{
|
|
int offset = srcCalculator.GetOffset(x) >> 4;
|
|
|
|
dstVec[offset] = srcVec[offset];
|
|
}
|
|
|
|
for (; x < stride; x++)
|
|
{
|
|
int offset = srcCalculator.GetOffset(x);
|
|
|
|
dstSpan[offset] = srcSpan[offset];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Performs the blit operation, triggered by the register write.
|
|
/// </summary>
|
|
/// <param name="argument">Method call argument</param>
|
|
private void PixelsFromMemorySrcY0Int(int argument)
|
|
{
|
|
var memoryManager = _channel.MemoryManager;
|
|
|
|
var dstCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetDstFormat);
|
|
var srcCopyTexture = Unsafe.As<uint, TwodTexture>(ref _state.State.SetSrcFormat);
|
|
|
|
long srcX = ((long)_state.State.SetPixelsFromMemorySrcX0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcX0Frac;
|
|
long srcY = ((long)_state.State.PixelsFromMemorySrcY0Int << 32) | (long)(ulong)_state.State.SetPixelsFromMemorySrcY0Frac;
|
|
|
|
long duDx = ((long)_state.State.SetPixelsFromMemoryDuDxInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDuDxFrac;
|
|
long dvDy = ((long)_state.State.SetPixelsFromMemoryDvDyInt << 32) | (long)(ulong)_state.State.SetPixelsFromMemoryDvDyFrac;
|
|
|
|
bool originCorner = _state.State.SetPixelsFromMemorySampleModeOrigin == SetPixelsFromMemorySampleModeOrigin.Corner;
|
|
|
|
if (originCorner)
|
|
{
|
|
// If the origin is corner, it is assumed that the guest API
|
|
// is manually centering the origin by adding a offset to the
|
|
// source region X/Y coordinates.
|
|
// Here we attempt to remove such offset to ensure we have the correct region.
|
|
// The offset is calculated as FactorXY / 2.0, where FactorXY = SrcXY / DstXY,
|
|
// so we do the same here by dividing the fixed point value by 2, while
|
|
// throwing away the fractional part to avoid rounding errors.
|
|
srcX -= (duDx >> 33) << 32;
|
|
srcY -= (dvDy >> 33) << 32;
|
|
}
|
|
|
|
int srcX1 = (int)(srcX >> 32);
|
|
int srcY1 = (int)(srcY >> 32);
|
|
|
|
int srcX2 = srcX1 + (int)((duDx * _state.State.SetPixelsFromMemoryDstWidth + uint.MaxValue) >> 32);
|
|
int srcY2 = srcY1 + (int)((dvDy * _state.State.SetPixelsFromMemoryDstHeight + uint.MaxValue) >> 32);
|
|
|
|
int dstX1 = (int)_state.State.SetPixelsFromMemoryDstX0;
|
|
int dstY1 = (int)_state.State.SetPixelsFromMemoryDstY0;
|
|
|
|
int dstX2 = dstX1 + (int)_state.State.SetPixelsFromMemoryDstWidth;
|
|
int dstY2 = dstY1 + (int)_state.State.SetPixelsFromMemoryDstHeight;
|
|
|
|
// The source and destination textures should at least be as big as the region being requested.
|
|
// The hints will only resize within alignment constraints, so out of bound copies won't resize in most cases.
|
|
var srcHint = new Size(srcX2, srcY2, 1);
|
|
var dstHint = new Size(dstX2, dstY2, 1);
|
|
|
|
var srcCopyTextureFormat = srcCopyTexture.Format.Convert();
|
|
|
|
int srcWidthAligned = srcCopyTexture.Stride / srcCopyTextureFormat.BytesPerPixel;
|
|
|
|
ulong offset = 0;
|
|
|
|
// For an out of bounds copy, we must ensure that the copy wraps to the next line,
|
|
// so for a copy from a 64x64 texture, in the region [32, 96[, there are 32 pixels that are
|
|
// outside the bounds of the texture. We fill the destination with the first 32 pixels
|
|
// of the next line on the source texture.
|
|
// This can be done by simply adding an offset to the texture address, so that the initial
|
|
// gap is skipped and the copy is inside bounds again.
|
|
// This is required by the proprietary guest OpenGL driver.
|
|
if (srcCopyTexture.LinearLayout && srcCopyTexture.Width == srcX2 && srcX2 > srcWidthAligned && srcX1 > 0)
|
|
{
|
|
offset = (ulong)(srcX1 * srcCopyTextureFormat.BytesPerPixel);
|
|
srcCopyTexture.Width -= srcX1;
|
|
srcX2 -= srcX1;
|
|
srcX1 = 0;
|
|
}
|
|
|
|
FormatInfo dstCopyTextureFormat = dstCopyTexture.Format.Convert();
|
|
|
|
bool canDirectCopy = GraphicsConfig.Fast2DCopy &&
|
|
srcX2 == dstX2 && srcY2 == dstY2 &&
|
|
IsDataCompatible(srcCopyTexture, dstCopyTexture, srcCopyTextureFormat, dstCopyTextureFormat) &&
|
|
IsCopyRegionComplete(srcCopyTexture, srcCopyTextureFormat, srcX1, srcY1, srcX2, srcY2) &&
|
|
IsCopyRegionComplete(dstCopyTexture, dstCopyTextureFormat, dstX1, dstY1, dstX2, dstY2);
|
|
|
|
var srcTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
|
|
memoryManager,
|
|
srcCopyTexture,
|
|
offset,
|
|
srcCopyTextureFormat,
|
|
!canDirectCopy,
|
|
false,
|
|
srcHint);
|
|
|
|
if (srcTexture == null)
|
|
{
|
|
if (canDirectCopy)
|
|
{
|
|
// Directly copy the data on CPU.
|
|
UnscaledFullCopy(srcCopyTexture, dstCopyTexture, srcX2, srcY2, srcCopyTextureFormat.BytesPerPixel);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
memoryManager.Physical.TextureCache.Lift(srcTexture);
|
|
|
|
// When the source texture that was found has a depth format,
|
|
// we must enforce the target texture also has a depth format,
|
|
// as copies between depth and color formats are not allowed.
|
|
|
|
if (srcTexture.Format.IsDepthOrStencil())
|
|
{
|
|
dstCopyTextureFormat = srcTexture.Info.FormatInfo;
|
|
}
|
|
else
|
|
{
|
|
dstCopyTextureFormat = dstCopyTexture.Format.Convert();
|
|
}
|
|
|
|
var dstTexture = memoryManager.Physical.TextureCache.FindOrCreateTexture(
|
|
memoryManager,
|
|
dstCopyTexture,
|
|
0,
|
|
dstCopyTextureFormat,
|
|
true,
|
|
srcTexture.ScaleMode == TextureScaleMode.Scaled,
|
|
dstHint);
|
|
|
|
if (dstTexture == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
float scale = srcTexture.ScaleFactor;
|
|
float dstScale = dstTexture.ScaleFactor;
|
|
|
|
Extents2D srcRegion = new Extents2D(
|
|
(int)Math.Ceiling(scale * (srcX1 / srcTexture.Info.SamplesInX)),
|
|
(int)Math.Ceiling(scale * (srcY1 / srcTexture.Info.SamplesInY)),
|
|
(int)Math.Ceiling(scale * (srcX2 / srcTexture.Info.SamplesInX)),
|
|
(int)Math.Ceiling(scale * (srcY2 / srcTexture.Info.SamplesInY)));
|
|
|
|
Extents2D dstRegion = new Extents2D(
|
|
(int)Math.Ceiling(dstScale * (dstX1 / dstTexture.Info.SamplesInX)),
|
|
(int)Math.Ceiling(dstScale * (dstY1 / dstTexture.Info.SamplesInY)),
|
|
(int)Math.Ceiling(dstScale * (dstX2 / dstTexture.Info.SamplesInX)),
|
|
(int)Math.Ceiling(dstScale * (dstY2 / dstTexture.Info.SamplesInY)));
|
|
|
|
bool linearFilter = _state.State.SetPixelsFromMemorySampleModeFilter == SetPixelsFromMemorySampleModeFilter.Bilinear;
|
|
|
|
srcTexture.HostTexture.CopyTo(dstTexture.HostTexture, srcRegion, dstRegion, linearFilter);
|
|
|
|
dstTexture.SignalModified();
|
|
}
|
|
}
|
|
}
|