using Ryujinx.Common; using Ryujinx.Graphics.Device; using Ryujinx.Graphics.Texture; using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory { /// /// Represents a Inline-to-Memory engine class. /// class InlineToMemoryClass : IDeviceState { private readonly GpuContext _context; private readonly GpuChannel _channel; private readonly DeviceState _state; private bool _isLinear; private int _offset; private int _size; private ulong _dstGpuVa; private int _dstX; private int _dstY; private int _dstWidth; private int _dstHeight; private int _dstStride; private int _dstGobBlocksInY; private int _dstGobBlocksInZ; private int _lineLengthIn; private int _lineCount; private bool _finished; private int[] _buffer; /// /// Creates a new instance of the Inline-to-Memory engine class. /// /// GPU context /// GPU channel /// Indicates if the internal state should be initialized. Set to false if part of another engine public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState) { _context = context; _channel = channel; if (initializeState) { _state = new DeviceState(new Dictionary { { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) }, { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) } }); } } /// /// Creates a new instance of the inline-to-memory engine class. /// /// GPU context /// GPU channel public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true) { } /// /// Reads data from the class registers. /// /// Register byte offset /// Data at the specified offset public int Read(int offset) => _state.Read(offset); /// /// Writes data to the class registers. /// /// Register byte offset /// Data to be written public void Write(int offset, int data) => _state.Write(offset, data); /// /// Launches Inline-to-Memory engine DMA copy. /// /// Method call argument private void LaunchDma(int argument) { LaunchDma(ref _state.State, argument); } /// /// Launches Inline-to-Memory engine DMA copy. /// /// Current class state /// Method call argument public void LaunchDma(ref InlineToMemoryClassState state, int argument) { _isLinear = (argument & 1) != 0; _offset = 0; _size = (int)(BitUtils.AlignUp(state.LineLengthIn, 4) * state.LineCount); int count = _size / 4; if (_buffer == null || _buffer.Length < count) { _buffer = new int[count]; } ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut; _dstGpuVa = dstGpuVa; _dstX = state.SetDstOriginBytesXV; _dstY = state.SetDstOriginSamplesYV; _dstWidth = (int)state.SetDstWidth; _dstHeight = (int)state.SetDstHeight; _dstStride = (int)state.PitchOut; _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight; _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth; _lineLengthIn = (int)state.LineLengthIn; _lineCount = (int)state.LineCount; _finished = false; } /// /// Pushes a block of data to the Inline-to-Memory engine. /// /// Data to push public void LoadInlineData(ReadOnlySpan data) { if (!_finished) { int copySize = Math.Min(data.Length, _buffer.Length - _offset); data.Slice(0, copySize).CopyTo(new Span(_buffer).Slice(_offset, copySize)); _offset += copySize; if (_offset * 4 >= _size) { FinishTransfer(); } } } /// /// Pushes a word of data to the Inline-to-Memory engine. /// /// Method call argument public void LoadInlineData(int argument) { if (!_finished) { _buffer[_offset++] = argument; if (_offset * 4 >= _size) { FinishTransfer(); } } } /// /// Performs actual copy of the inline data after the transfer is finished. /// private void FinishTransfer() { var memoryManager = _channel.MemoryManager; var data = MemoryMarshal.Cast(_buffer).Slice(0, _size); if (_isLinear && _lineCount == 1) { memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn)); _context.AdvanceSequence(); } else { // TODO: Verify if the destination X/Y and width/height are taken into account // for linear texture transfers. If not, we can use the fast path for that aswell. // Right now the copy code at the bottom assumes that it is used on both which might be incorrect. if (!_isLinear) { var target = memoryManager.Physical.TextureCache.FindTexture( memoryManager, _dstGpuVa, 1, _dstStride, _dstHeight, _lineLengthIn, _lineCount, _isLinear, _dstGobBlocksInY, _dstGobBlocksInZ); if (target != null) { target.SynchronizeMemory(); target.SetData(data, 0, 0, new GAL.Rectangle(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount)); target.SignalModified(); return; } } var dstCalculator = new OffsetCalculator( _dstWidth, _dstHeight, _dstStride, _isLinear, _dstGobBlocksInY, 1); int srcOffset = 0; for (int y = _dstY; y < _dstY + _lineCount; y++) { int x1 = _dstX; int x2 = _dstX + _lineLengthIn; int x1Round = BitUtils.AlignUp(_dstX, 16); int x2Trunc = BitUtils.AlignDown(x2, 16); int x = x1; if (x1Round <= x2) { for (; x < x1Round; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } } for (; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, MemoryMarshal.Cast>(data.Slice(srcOffset, 16))[0]); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. int misalignment = _lineLengthIn & 3; if (misalignment != 0) { srcOffset += 4 - misalignment; } } _context.AdvanceSequence(); } _finished = true; } } }