diff --git a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
index aa94f1f881..fd93cd8ba7 100644
--- a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
+++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs
@@ -6,6 +6,7 @@ using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine.Dma
@@ -32,6 +33,69 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
RemapEnable = 1 << 10
}
+ ///
+ /// Texture parameters for copy.
+ ///
+ private struct TextureParams
+ {
+ ///
+ /// Copy region X coordinate.
+ ///
+ public readonly int RegionX;
+
+ ///
+ /// Copy region Y coordinate.
+ ///
+ public readonly int RegionY;
+
+ ///
+ /// Offset from the base pointer of the data in memory.
+ ///
+ public readonly int BaseOffset;
+
+ ///
+ /// Bytes per pixel.
+ ///
+ public readonly int Bpp;
+
+ ///
+ /// Whether the texture is linear. If false, the texture is block linear.
+ ///
+ public readonly bool Linear;
+
+ ///
+ /// Pixel offset from XYZ coordinates calculator.
+ ///
+ public readonly OffsetCalculator Calculator;
+
+ ///
+ /// Creates texture parameters.
+ ///
+ /// Copy region X coordinate
+ /// Copy region Y coordinate
+ /// Offset from the base pointer of the data in memory
+ /// Bytes per pixel
+ /// Whether the texture is linear. If false, the texture is block linear
+ /// Pixel offset from XYZ coordinates calculator
+ public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator)
+ {
+ RegionX = regionX;
+ RegionY = regionY;
+ BaseOffset = baseOffset;
+ Bpp = bpp;
+ Linear = linear;
+ Calculator = calculator;
+ }
+ }
+
+ [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)]
+ private struct UInt24
+ {
+ public byte Byte0;
+ public byte Byte1;
+ public byte Byte2;
+ }
+
///
/// Creates a new instance of the DMA copy engine class.
///
@@ -154,8 +218,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
{
// Buffer to texture copy.
int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
- int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1;
- int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1;
+ int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1;
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+ int srcBpp = remap ? srcComponents * componentSize : 1;
+ int dstBpp = remap ? dstComponents * componentSize : 1;
var dst = Unsafe.As(ref _state.State.SetDstBlockSize);
var src = Unsafe.As(ref _state.State.SetSrcBlockSize);
@@ -274,63 +340,51 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
}
}
- unsafe bool Convert(Span dstSpan, ReadOnlySpan srcSpan) where T : unmanaged
- {
- if (srcLinear && dstLinear && srcBpp == dstBpp)
- {
- // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
- // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
- for (int y = 0; y < yCount; y++)
- {
- srcCalculator.SetY(srcRegionY + y);
- dstCalculator.SetY(dstRegionY + y);
- int srcOffset = srcCalculator.GetOffset(srcRegionX);
- int dstOffset = dstCalculator.GetOffset(dstRegionX);
- srcSpan.Slice(srcOffset - srcBaseOffset, xCount * srcBpp)
- .CopyTo(dstSpan.Slice(dstOffset - dstBaseOffset, xCount * dstBpp));
- }
- }
- else
- {
- fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
- {
- byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
- byte* srcBase = srcPtr - srcBaseOffset;
-
- for (int y = 0; y < yCount; y++)
- {
- srcCalculator.SetY(srcRegionY + y);
- dstCalculator.SetY(dstRegionY + y);
-
- for (int x = 0; x < xCount; x++)
- {
- int srcOffset = srcCalculator.GetOffset(srcRegionX + x);
- int dstOffset = dstCalculator.GetOffset(dstRegionX + x);
-
- *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
- }
- }
- }
- }
-
- return true;
- }
-
// OPT: This allocates a (potentially) huge temporary array and then copies an existing
// region of memory into it, data that might get overwritten entirely anyways. Ideally this should
// all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
Span dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
- bool _ = srcBpp switch
+ TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator);
+ TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator);
+
+ // If remapping is enabled, we always copy the components directly, in order.
+ // If it's enabled, but the mapping is just XYZW, we also copy them in order.
+ bool isIdentityRemap = !remap ||
+ (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX &&
+ (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) &&
+ (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) &&
+ (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW));
+
+ if (isIdentityRemap)
{
- 1 => Convert(dstSpan, srcSpan),
- 2 => Convert(dstSpan, srcSpan),
- 4 => Convert(dstSpan, srcSpan),
- 8 => Convert(dstSpan, srcSpan),
- 12 => Convert(dstSpan, srcSpan),
- 16 => Convert>(dstSpan, srcSpan),
- _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
- };
+ // The order of the components doesn't change, so we can just copy directly
+ // (with layout conversion if necessary).
+
+ switch (srcBpp)
+ {
+ case 1: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 8: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 12: Copy(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 16: Copy>(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.");
+ }
+ }
+ else
+ {
+ // The order or value of the components might change.
+
+ switch (componentSize)
+ {
+ case 1: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 2: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 3: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ case 4: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break;
+ default: throw new NotSupportedException($"Unable to copy ${componentSize} component size.");
+ }
+ }
memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
}
@@ -372,6 +426,133 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
}
}
+ ///
+ /// Copies data from one texture to another, while performing layout conversion if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Source texture memory region
+ /// Destination texture parameters
+ /// Source texture parameters
+ private unsafe void Copy(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ if (src.Linear && dst.Linear && src.Bpp == dst.Bpp)
+ {
+ // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
+ // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+ int srcOffset = src.Calculator.GetOffset(src.RegionX);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX);
+ srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp)
+ .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp));
+ }
+ }
+ else
+ {
+ fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+ byte* srcBase = srcPtr - src.BaseOffset;
+
+ for (int y = 0; y < yCount; y++)
+ {
+ src.Calculator.SetY(src.RegionY + y);
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int srcOffset = src.Calculator.GetOffset(src.RegionX + x);
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
+ }
+ }
+ }
+ }
+ }
+
+ ///
+ /// Sets texture pixel data to a constant value, while performing layout conversion if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Destination texture parameters
+ /// Constant pixel value to be set
+ private unsafe void Fill(Span dstSpan, TextureParams dst, T fillValue) where T : unmanaged
+ {
+ int xCount = (int)_state.State.LineLengthIn;
+ int yCount = (int)_state.State.LineCount;
+
+ fixed (byte* dstPtr = dstSpan)
+ {
+ byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
+
+ for (int y = 0; y < yCount; y++)
+ {
+ dst.Calculator.SetY(dst.RegionY + y);
+
+ for (int x = 0; x < xCount; x++)
+ {
+ int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
+
+ *(T*)(dstBase + dstOffset) = fillValue;
+ }
+ }
+ }
+ }
+
+ ///
+ /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary.
+ ///
+ /// Pixel type
+ /// Destination texture memory region
+ /// Source texture memory region
+ /// Destination texture parameters
+ /// Source texture parameters
+ private void CopyShuffle(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
+ {
+ int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
+
+ for (int i = 0; i < dstComponents; i++)
+ {
+ SetRemapComponentsDst componentsDst = i switch
+ {
+ 0 => _state.State.SetRemapComponentsDstX,
+ 1 => _state.State.SetRemapComponentsDstY,
+ 2 => _state.State.SetRemapComponentsDstZ,
+ _ => _state.State.SetRemapComponentsDstW
+ };
+
+ switch (componentsDst)
+ {
+ case SetRemapComponentsDst.SrcX:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan, dst, src);
+ break;
+ case SetRemapComponentsDst.SrcY:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf()), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcZ:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 2), dst, src);
+ break;
+ case SetRemapComponentsDst.SrcW:
+ Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 3), dst, src);
+ break;
+ case SetRemapComponentsDst.ConstA:
+ Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstA));
+ break;
+ case SetRemapComponentsDst.ConstB:
+ Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstB));
+ break;
+ }
+ }
+ }
+
///
/// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
///