From a1f77a5b6ab33bbcc0a8e070e50cee24ad82eac1 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 17 Jan 2021 20:08:06 +0000 Subject: [PATCH 01/27] Implement lazy flush-on-read for Buffers (SSBO/Copy) (#1790) * Initial implementation of buffer flush (VERY WIP) * Host shaders need to be rebuilt for the SSBO write flag. * New approach with reserved regions and gl sync * Fix a ton of buffer issues. * Remove unused buffer unmapped behaviour * Revert "Remove unused buffer unmapped behaviour" This reverts commit f1700e52fb8760180ac5e0987a07d409d1e70ece. * Delete modified ranges on unmap Fixes potential crashes in Super Smash Bros, where a previously modified range could lie on either side of an unmap. * Cache some more delegates. * Dispose Sync on Close * Also create host sync for GPFifo syncpoint increment. * Copy buffer optimization, add docs * Fix race condition with OpenGL Sync * Enable read tracking on CommandBuffer, insert syncpoint on WaitForIdle * Performance: Only flush individual pages of SSBO at a time This avoids flushing large amounts of data when only a small amount is actually used. * Signal Modified rather than flushing after clear * Fix some docs and code style. * Introduce a new test for tracking memory protection. Sucessfully demonstrates that the bug causing write protection to be cleared by a read action has been fixed. (these tests fail on master) * Address Comments * Add host sync for SetReference This ensures that any indirect draws will correctly flush any related buffer data written before them. Fixes some flashing and misplaced world geometry in MH rise. * Make PageAlign static * Re-enable read tracking, for reads. --- Ryujinx.Cpu/MemoryManager.cs | 2 +- Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs | 1 + .../Tracking/CpuSmartMultiRegionHandle.cs | 1 + Ryujinx.Graphics.GAL/IRenderer.cs | 4 + Ryujinx.Graphics.Gpu/Engine/Compute.cs | 2 +- .../Engine/GPFifo/GPFifoClass.cs | 13 + .../Engine/GPFifo/GPFifoDevice.cs | 2 +- .../Engine/MethodIncrementSyncpoint.cs | 1 + Ryujinx.Graphics.Gpu/Engine/Methods.cs | 3 +- Ryujinx.Graphics.Gpu/GpuContext.cs | 46 +++ Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 206 +++++++++- Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs | 11 +- Ryujinx.Graphics.Gpu/Memory/BufferManager.cs | 121 ++++-- .../Memory/BufferModifiedRangeList.cs | 367 ++++++++++++++++++ Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 1 + Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- Ryujinx.Graphics.OpenGL/Renderer.cs | 15 + Ryujinx.Graphics.OpenGL/Sync.cs | 129 ++++++ Ryujinx.Graphics.Shader/BufferDescriptor.cs | 10 + Ryujinx.Graphics.Shader/BufferUsageFlags.cs | 18 + .../Glsl/Instructions/InstGenMemory.cs | 27 ++ .../MockVirtualMemoryManager.cs | 3 + Ryujinx.Memory.Tests/TrackingTests.cs | 63 +++ Ryujinx.Memory/Range/RangeList.cs | 46 +-- Ryujinx.Memory/Tracking/MultiRegionHandle.cs | 11 + Ryujinx.Memory/Tracking/RegionHandle.cs | 1 + .../Tracking/SmartMultiRegionHandle.cs | 20 + Ryujinx.Memory/Tracking/VirtualRegion.cs | 4 +- 28 files changed, 1073 insertions(+), 57 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs create mode 100644 Ryujinx.Graphics.OpenGL/Sync.cs create mode 100644 Ryujinx.Graphics.Shader/BufferUsageFlags.cs diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs index 348ca2bd20..cef2012656 100644 --- a/Ryujinx.Cpu/MemoryManager.cs +++ b/Ryujinx.Cpu/MemoryManager.cs @@ -131,7 +131,7 @@ namespace Ryujinx.Cpu /// Throw for unhandled invalid or unmapped memory accesses public T Read(ulong va) where T : unmanaged { - return MemoryMarshal.Cast(GetSpan(va, Unsafe.SizeOf()))[0]; + return MemoryMarshal.Cast(GetSpan(va, Unsafe.SizeOf(), true))[0]; } /// diff --git a/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs b/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs index f76410b4bc..8204a13eb0 100644 --- a/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs +++ b/Ryujinx.Cpu/Tracking/CpuMultiRegionHandle.cs @@ -18,6 +18,7 @@ namespace Ryujinx.Cpu.Tracking public void QueryModified(Action modifiedAction) => _impl.QueryModified(modifiedAction); public void QueryModified(ulong address, ulong size, Action modifiedAction) => _impl.QueryModified(address, size, modifiedAction); public void QueryModified(ulong address, ulong size, Action modifiedAction, int sequenceNumber) => _impl.QueryModified(address, size, modifiedAction, sequenceNumber); + public void RegisterAction(ulong address, ulong size, RegionSignal action) => _impl.RegisterAction(address, size, action); public void SignalWrite() => _impl.SignalWrite(); } } diff --git a/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs b/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs index ddeeab0ae3..e38babfc57 100644 --- a/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs +++ b/Ryujinx.Cpu/Tracking/CpuSmartMultiRegionHandle.cs @@ -15,6 +15,7 @@ namespace Ryujinx.Cpu.Tracking } public void Dispose() => _impl.Dispose(); + public void RegisterAction(RegionSignal action) => _impl.RegisterAction(action); public void QueryModified(Action modifiedAction) => _impl.QueryModified(modifiedAction); public void QueryModified(ulong address, ulong size, Action modifiedAction) => _impl.QueryModified(address, size, modifiedAction); public void QueryModified(ulong address, ulong size, Action modifiedAction, int sequenceNumber) => _impl.QueryModified(address, size, modifiedAction, sequenceNumber); diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index 465c880539..d03cb4c01b 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -21,6 +21,8 @@ namespace Ryujinx.Graphics.GAL ISampler CreateSampler(SamplerCreateInfo info); ITexture CreateTexture(TextureCreateInfo info, float scale); + void CreateSync(ulong id); + void DeleteBuffer(BufferHandle buffer); byte[] GetBufferData(BufferHandle buffer, int offset, int size); @@ -39,6 +41,8 @@ namespace Ryujinx.Graphics.GAL void ResetCounter(CounterType type); + void WaitSync(ulong id); + void Initialize(GraphicsDebugLevel logLevel); } } diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs index fd3114a794..c7e059ba3a 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs @@ -97,7 +97,7 @@ namespace Ryujinx.Graphics.Gpu.Engine SbDescriptor sbDescriptor = _context.PhysicalMemory.Read(sbDescAddress); - BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size); + BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); } BufferManager.SetComputeStorageBufferBindings(info.SBuffers); diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs index 0e87aa3d2b..84d353502c 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs @@ -39,6 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { nameof(GPFifoClassState.Semaphored), new RwCallback(Semaphored, null) }, { nameof(GPFifoClassState.Syncpointb), new RwCallback(Syncpointb, null) }, { nameof(GPFifoClassState.WaitForIdle), new RwCallback(WaitForIdle, null) }, + { nameof(GPFifoClassState.SetReference), new RwCallback(SetReference, null) }, { nameof(GPFifoClassState.LoadMmeInstructionRam), new RwCallback(LoadMmeInstructionRam, null) }, { nameof(GPFifoClassState.LoadMmeStartAddressRam), new RwCallback(LoadMmeStartAddressRam, null) }, { nameof(GPFifoClassState.SetMmeShadowRamControl), new RwCallback(SetMmeShadowRamControl, null) } @@ -136,6 +137,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } else if (operation == SyncpointbOperation.Incr) { + _context.CreateHostSyncIfNeeded(); _context.Synchronization.IncrementSyncpoint(syncpointId); } @@ -150,6 +152,17 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { _context.Methods.PerformDeferredDraws(); _context.Renderer.Pipeline.Barrier(); + + _context.CreateHostSyncIfNeeded(); + } + + /// + /// Used as an indirect data barrier on NVN. When used, access to previously written data must be coherent. + /// + /// Method call argument + public void SetReference(int argument) + { + _context.CreateHostSyncIfNeeded(); } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs index 25614a135d..d0fcf14212 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs @@ -52,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { if (Words == null) { - Words = MemoryMarshal.Cast(context.MemoryManager.GetSpan(EntryAddress, (int)EntryCount * 4)).ToArray(); + Words = MemoryMarshal.Cast(context.MemoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, true)).ToArray(); } } } diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs index 8fcfb9000e..9c22275d55 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodIncrementSyncpoint.cs @@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine { uint syncpointId = (uint)(argument) & 0xFFFF; + _context.CreateHostSyncIfNeeded(); _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result. _context.Synchronization.IncrementSyncpoint(syncpointId); } diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs index 9f27aec223..d6bd51106c 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs @@ -61,6 +61,7 @@ namespace Ryujinx.Graphics.Gpu.Engine context.MemoryManager.MemoryUnmapped += _counterCache.MemoryUnmappedHandler; context.MemoryManager.MemoryUnmapped += TextureManager.MemoryUnmappedHandler; + context.MemoryManager.MemoryUnmapped += BufferManager.MemoryUnmappedHandler; } /// @@ -333,7 +334,7 @@ namespace Ryujinx.Graphics.Gpu.Engine SbDescriptor sbDescriptor = _context.PhysicalMemory.Read(sbDescAddress); - BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size); + BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); } } } diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index 6834afb422..15f757c87a 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -4,6 +4,7 @@ using Ryujinx.Graphics.Gpu.Engine.GPFifo; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Synchronization; using System; +using System.Collections.Generic; using System.Threading; namespace Ryujinx.Graphics.Gpu @@ -59,6 +60,18 @@ namespace Ryujinx.Graphics.Gpu /// internal int SequenceNumber { get; private set; } + /// + /// Internal sync number, used to denote points at which host synchronization can be requested. + /// + internal ulong SyncNumber { get; private set; } + + /// + /// Actions to be performed when a CPU waiting sync point is triggered. + /// If there are more than 0 items when this happens, a host sync object will be generated for the given , + /// and the SyncNumber will be incremented. + /// + internal List SyncActions { get; } + private readonly Lazy _caps; /// @@ -87,6 +100,8 @@ namespace Ryujinx.Graphics.Gpu _caps = new Lazy(Renderer.GetCapabilities); HostInitalized = new ManualResetEvent(false); + + SyncActions = new List(); } /// @@ -118,6 +133,37 @@ namespace Ryujinx.Graphics.Gpu PhysicalMemory = new PhysicalMemory(cpuMemory); } + /// + /// Registers an action to be performed the next time a syncpoint is incremented. + /// This will also ensure a host sync object is created, and is incremented. + /// + /// The action to be performed on sync object creation + public void RegisterSyncAction(Action action) + { + SyncActions.Add(action); + } + + /// + /// Creates a host sync object if there are any pending sync actions. The actions will then be called. + /// If no actions are present, a host sync object is not created. + /// + public void CreateHostSyncIfNeeded() + { + if (SyncActions.Count > 0) + { + Renderer.CreateSync(SyncNumber); + + SyncNumber++; + + foreach (Action action in SyncActions) + { + action(); + } + + SyncActions.Clear(); + } + } + /// /// Disposes all GPU resources currently cached. /// It's an error to push any GPU commands after disposal. diff --git a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index bf2452833f..7127871a79 100644 --- a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -1,6 +1,7 @@ using Ryujinx.Cpu.Tracking; using Ryujinx.Graphics.GAL; using Ryujinx.Memory.Range; +using Ryujinx.Memory.Tracking; using System; namespace Ryujinx.Graphics.Gpu.Memory @@ -34,12 +35,28 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public ulong EndAddress => Address + Size; + /// + /// Ranges of the buffer that have been modified on the GPU. + /// Ranges defined here cannot be updated from CPU until a CPU waiting sync point is reached. + /// Then, write tracking will signal, wait for GPU sync (generated at the syncpoint) and flush these regions. + /// + /// + /// This is null until at least one modification occurs. + /// + private BufferModifiedRangeList _modifiedRanges = null; + private CpuMultiRegionHandle _memoryTrackingGranular; + private CpuRegionHandle _memoryTracking; + + private readonly RegionSignal _externalFlushDelegate; + private readonly Action _loadDelegate; private readonly Action _modifiedDelegate; + private int _sequenceNumber; private bool _useGranular; + private bool _syncActionRegistered; /// /// Creates a new instance of the buffer. @@ -66,6 +83,8 @@ namespace Ryujinx.Graphics.Gpu.Memory _memoryTracking = context.PhysicalMemory.BeginTracking(address, size); } + _externalFlushDelegate = new RegionSignal(ExternalFlush); + _loadDelegate = new Action(LoadRegion); _modifiedDelegate = new Action(RegionModified); } @@ -116,12 +135,131 @@ namespace Ryujinx.Graphics.Gpu.Memory if (_memoryTracking.Dirty && _context.SequenceNumber != _sequenceNumber) { _memoryTracking.Reprotect(); - _context.Renderer.SetBufferData(Handle, 0, _context.PhysicalMemory.GetSpan(Address, (int)Size)); + + if (_modifiedRanges != null) + { + _modifiedRanges.ExcludeModifiedRegions(Address, Size, _loadDelegate); + } + else + { + _context.Renderer.SetBufferData(Handle, 0, _context.PhysicalMemory.GetSpan(Address, (int)Size)); + } + _sequenceNumber = _context.SequenceNumber; } } } + /// + /// Ensure that the modified range list exists. + /// + private void EnsureRangeList() + { + if (_modifiedRanges == null) + { + _modifiedRanges = new BufferModifiedRangeList(_context); + } + } + + /// + /// Signal that the given region of the buffer has been modified. + /// + /// The start address of the modified region + /// The size of the modified region + public void SignalModified(ulong address, ulong size) + { + EnsureRangeList(); + + _modifiedRanges.SignalModified(address, size); + + if (!_syncActionRegistered) + { + _context.RegisterSyncAction(SyncAction); + _syncActionRegistered = true; + } + } + + /// + /// Indicate that mofifications in a given region of this buffer have been overwritten. + /// + /// The start address of the region + /// The size of the region + public void ClearModified(ulong address, ulong size) + { + if (_modifiedRanges != null) + { + _modifiedRanges.Clear(address, size); + } + } + + /// + /// Action to be performed when a syncpoint is reached after modification. + /// This will register read/write tracking to flush the buffer from GPU when its memory is used. + /// + private void SyncAction() + { + _syncActionRegistered = false; + + if (_useGranular) + { + _modifiedRanges.GetRanges(Address, Size, (address, size) => + { + _memoryTrackingGranular.RegisterAction(address, size, _externalFlushDelegate); + SynchronizeMemory(address, size); + }); + } + else + { + _memoryTracking.RegisterAction(_externalFlushDelegate); + SynchronizeMemory(Address, Size); + } + } + + /// + /// Inherit modified ranges from another buffer. + /// + /// The buffer to inherit from + public void InheritModifiedRanges(Buffer from) + { + if (from._modifiedRanges != null) + { + if (from._syncActionRegistered && !_syncActionRegistered) + { + _context.RegisterSyncAction(SyncAction); + _syncActionRegistered = true; + } + + EnsureRangeList(); + _modifiedRanges.InheritRanges(from._modifiedRanges, (ulong address, ulong size) => + { + if (_useGranular) + { + _memoryTrackingGranular.RegisterAction(address, size, _externalFlushDelegate); + } + else + { + _memoryTracking.RegisterAction(_externalFlushDelegate); + } + }); + } + } + + /// + /// Determine if a given region of the buffer has been modified, and must be flushed. + /// + /// The start address of the region + /// The size of the region + /// + public bool IsModified(ulong address, ulong size) + { + if (_modifiedRanges != null) + { + return _modifiedRanges.HasRange(address, size); + } + + return false; + } + /// /// Indicate that a region of the buffer was modified, and must be loaded from memory. /// @@ -141,6 +279,23 @@ namespace Ryujinx.Graphics.Gpu.Memory mSize = maxSize; } + if (_modifiedRanges != null) + { + _modifiedRanges.ExcludeModifiedRegions(mAddress, mSize, _loadDelegate); + } + else + { + LoadRegion(mAddress, mSize); + } + } + + /// + /// Load a region of the buffer from memory. + /// + /// Start address of the modified region + /// Size of the modified region + private void LoadRegion(ulong mAddress, ulong mSize) + { int offset = (int)(mAddress - Address); _context.Renderer.SetBufferData(Handle, offset, _context.PhysicalMemory.GetSpan(mAddress, (int)mSize)); @@ -172,15 +327,62 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.PhysicalMemory.WriteUntracked(address, data); } + /// + /// Align a given address and size region to page boundaries. + /// + /// The start address of the region + /// The size of the region + /// The page aligned address and size + private static (ulong address, ulong size) PageAlign(ulong address, ulong size) + { + ulong pageMask = MemoryManager.PageMask; + ulong rA = address & ~pageMask; + ulong rS = ((address + size + pageMask) & ~pageMask) - rA; + return (rA, rS); + } + + /// + /// Flush modified ranges of the buffer from another thread. + /// This will flush all modifications made before the active SyncNumber was set, and may block to wait for GPU sync. + /// + /// Address of the memory action + /// Size in bytes + public void ExternalFlush(ulong address, ulong size) + { + _context.Renderer.BackgroundContextAction(() => + { + var ranges = _modifiedRanges; + + if (ranges != null) + { + (address, size) = PageAlign(address, size); + ranges.WaitForAndGetRanges(address, size, Flush); + } + }); + } + + /// + /// Called when part of the memory for this buffer has been unmapped. + /// Calls are from non-GPU threads. + /// + /// Start address of the unmapped region + /// Size of the unmapped region + public void Unmapped(ulong address, ulong size) + { + _modifiedRanges?.Clear(address, size); + } + /// /// Disposes the host buffer. /// public void Dispose() { - _context.Renderer.DeleteBuffer(Handle); + _modifiedRanges?.Clear(); _memoryTrackingGranular?.Dispose(); _memoryTracking?.Dispose(); + + _context.Renderer.DeleteBuffer(Handle); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs b/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs index 060171fb08..5569b9470b 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferBounds.cs @@ -1,3 +1,5 @@ +using Ryujinx.Graphics.Shader; + namespace Ryujinx.Graphics.Gpu.Memory { /// @@ -15,15 +17,22 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public ulong Size { get; } + /// + /// Buffer usage flags. + /// + public BufferUsageFlags Flags { get; } + /// /// Creates a new buffer region. /// /// Region address /// Region size - public BufferBounds(ulong address, ulong size) + /// Buffer usage flags + public BufferBounds(ulong address, ulong size, BufferUsageFlags flags = BufferUsageFlags.None) { Address = address; Size = size; + Flags = flags; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 0c6431913a..cdcc5a370c 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -68,9 +68,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Buffer slot /// Region virtual address /// Region size in bytes - public void SetBounds(int index, ulong address, ulong size) + /// Buffer usage flags + public void SetBounds(int index, ulong address, ulong size, BufferUsageFlags flags = BufferUsageFlags.None) { - Buffers[index] = new BufferBounds(address, size); + Buffers[index] = new BufferBounds(address, size, flags); } /// @@ -219,7 +220,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the storage buffer /// Start GPU virtual address of the buffer /// Size in bytes of the storage buffer - public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size) + /// Buffer usage flags + public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size, BufferUsageFlags flags) { size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); @@ -227,7 +229,7 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong address = TranslateAndCreateBuffer(gpuVa, size); - _cpStorageBuffers.SetBounds(index, address, size); + _cpStorageBuffers.SetBounds(index, address, size, flags); } /// @@ -238,7 +240,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the storage buffer /// Start GPU virtual address of the buffer /// Size in bytes of the storage buffer - public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size) + /// Buffer usage flags + public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size, BufferUsageFlags flags) { size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); @@ -252,7 +255,7 @@ namespace Ryujinx.Graphics.Gpu.Memory _gpStorageBuffersDirty = true; } - _gpStorageBuffers[stage].SetBounds(index, address, size); + _gpStorageBuffers[stage].SetBounds(index, address, size, flags); } /// @@ -385,6 +388,30 @@ namespace Ryujinx.Graphics.Gpu.Memory return mask; } + /// + /// Handles removal of buffers written to a memory region being unmapped. + /// + /// Sender object + /// Event arguments + public void MemoryUnmappedHandler(object sender, UnmapEventArgs e) + { + Buffer[] overlaps = new Buffer[10]; + int overlapCount; + + ulong address = _context.MemoryManager.Translate(e.Address); + ulong size = e.Size; + + lock (_buffers) + { + overlapCount = _buffers.FindOverlaps(address, size, ref overlaps); + } + + for (int i = 0; i < overlapCount; i++) + { + overlaps[i].Unmapped(address, size); + } + } + /// /// Performs address translation of the GPU virtual address, and creates a /// new buffer, if needed, for the specified range. @@ -443,7 +470,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the buffer private void CreateBufferAligned(ulong address, ulong size) { - int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps); + int overlapsCount; + + lock (_buffers) + { + overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref _bufferOverlaps); + } if (overlapsCount != 0) { @@ -463,15 +495,19 @@ namespace Ryujinx.Graphics.Gpu.Memory address = Math.Min(address, buffer.Address); endAddress = Math.Max(endAddress, buffer.EndAddress); - buffer.SynchronizeMemory(buffer.Address, buffer.Size); - - _buffers.Remove(buffer); + lock (_buffers) + { + _buffers.Remove(buffer); + } } Buffer newBuffer = new Buffer(_context, address, endAddress - address); newBuffer.SynchronizeMemory(address, endAddress - address); - _buffers.Add(newBuffer); + lock (_buffers) + { + _buffers.Add(newBuffer); + } for (int index = 0; index < overlapsCount; index++) { @@ -479,7 +515,10 @@ namespace Ryujinx.Graphics.Gpu.Memory int dstOffset = (int)(buffer.Address - newBuffer.Address); + buffer.SynchronizeMemory(buffer.Address, buffer.Size); + buffer.CopyTo(newBuffer, dstOffset); + newBuffer.InheritModifiedRanges(buffer); buffer.Dispose(); } @@ -493,7 +532,10 @@ namespace Ryujinx.Graphics.Gpu.Memory // No overlap, just create a new buffer. Buffer buffer = new Buffer(_context, address, size); - _buffers.Add(buffer); + lock (_buffers) + { + _buffers.Add(buffer); + } } ShrinkOverlapsBufferIfNeeded(); @@ -549,7 +591,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (bounds.Address != 0) { - sRanges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size); + sRanges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)); } } @@ -722,7 +764,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (bounds.Address != 0) { - ranges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size); + ranges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)); } } } @@ -818,7 +860,17 @@ namespace Ryujinx.Graphics.Gpu.Memory dstOffset, (int)size); - dstBuffer.Flush(dstAddress, size); + if (srcBuffer.IsModified(srcAddress, size)) + { + dstBuffer.SignalModified(dstAddress, size); + } + else + { + // Optimization: If the data being copied is already in memory, then copy it directly instead of flushing from GPU. + + dstBuffer.ClearModified(dstAddress, size); + _context.PhysicalMemory.WriteUntracked(dstAddress, _context.PhysicalMemory.GetSpan(srcAddress, (int)size)); + } } /// @@ -840,7 +892,7 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.Renderer.Pipeline.ClearBuffer(buffer.Handle, offset, (int)size, value); - buffer.Flush(address, size); + buffer.SignalModified(address, size); } /// @@ -848,10 +900,11 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range + /// Whether the buffer will be written to by this use /// The buffer sub-range for the given range - private BufferRange GetBufferRange(ulong address, ulong size) + private BufferRange GetBufferRange(ulong address, ulong size, bool write = false) { - return GetBuffer(address, size).GetRange(address, size); + return GetBuffer(address, size, write).GetRange(address, size); } /// @@ -860,20 +913,32 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range + /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private Buffer GetBuffer(ulong address, ulong size) + private Buffer GetBuffer(ulong address, ulong size, bool write = false) { Buffer buffer; if (size != 0) { - buffer = _buffers.FindFirstOverlap(address, size); + lock (_buffers) + { + buffer = _buffers.FindFirstOverlap(address, size); + } buffer.SynchronizeMemory(address, size); + + if (write) + { + buffer.SignalModified(address, size); + } } else { - buffer = _buffers.FindFirstOverlap(address, 1); + lock (_buffers) + { + buffer = _buffers.FindFirstOverlap(address, 1); + } } return buffer; @@ -888,7 +953,12 @@ namespace Ryujinx.Graphics.Gpu.Memory { if (size != 0) { - Buffer buffer = _buffers.FindFirstOverlap(address, size); + Buffer buffer; + + lock (_buffers) + { + buffer = _buffers.FindFirstOverlap(address, size); + } buffer.SynchronizeMemory(address, size); } @@ -900,9 +970,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public void Dispose() { - foreach (Buffer buffer in _buffers) + lock (_buffers) { - buffer.Dispose(); + foreach (Buffer buffer in _buffers) + { + buffer.Dispose(); + } } } } diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs b/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs new file mode 100644 index 0000000000..594dd06648 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs @@ -0,0 +1,367 @@ +using Ryujinx.Memory.Range; +using System; +using System.Linq; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// A range within a buffer that has been modified by the GPU. + /// + class BufferModifiedRange : IRange + { + /// + /// Start address of the range in guest memory. + /// + public ulong Address { get; } + + /// + /// Size of the range in bytes. + /// + public ulong Size { get; } + + /// + /// End address of the range in guest memory. + /// + public ulong EndAddress => Address + Size; + + /// + /// The GPU sync number at the time of the last modification. + /// + public ulong SyncNumber { get; internal set; } + + /// + /// Creates a new instance of a modified range. + /// + /// Start address of the range + /// Size of the range in bytes + /// The GPU sync number at the time of creation + public BufferModifiedRange(ulong address, ulong size, ulong syncNumber) + { + Address = address; + Size = size; + SyncNumber = syncNumber; + } + + /// + /// Checks if a given range overlaps with the modified range. + /// + /// Start address of the range + /// Size in bytes of the range + /// True if the range overlaps, false otherwise + public bool OverlapsWith(ulong address, ulong size) + { + return Address < address + size && address < EndAddress; + } + } + + /// + /// A structure used to track GPU modified ranges within a buffer. + /// + class BufferModifiedRangeList : RangeList + { + private GpuContext _context; + + private object _lock = new object(); + + // The list can be accessed from both the GPU thread, and a background thread. + private BufferModifiedRange[] _foregroundOverlaps = new BufferModifiedRange[1]; + private BufferModifiedRange[] _backgroundOverlaps = new BufferModifiedRange[1]; + + /// + /// Creates a new instance of a modified range list. + /// + /// GPU context that the buffer range list belongs to + public BufferModifiedRangeList(GpuContext context) + { + _context = context; + } + + /// + /// Given an input range, calls the given action with sub-ranges which exclude any of the modified regions. + /// + /// Start address of the query range + /// Size of the query range in bytes + /// Action to perform for each remaining sub-range of the input range + public void ExcludeModifiedRegions(ulong address, ulong size, Action action) + { + lock (_lock) + { + // Slices a given region using the modified regions in the list. Calls the action for the new slices. + int count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps); + + for (int i = 0; i < count; i++) + { + BufferModifiedRange overlap = _foregroundOverlaps[i]; + + if (overlap.Address > address) + { + // The start of the remaining region is uncovered by this overlap. Call the action for it. + action(address, overlap.Address - address); + } + + // Remaining region is after this overlap. + size -= overlap.EndAddress - address; + address = overlap.EndAddress; + } + + if ((long)size > 0) + { + // If there is any region left after removing the overlaps, signal it. + action(address, size); + } + } + } + + /// + /// Signal that a region of the buffer has been modified, and add the new region to the range list. + /// Any overlapping ranges will be (partially) removed. + /// + /// Start address of the modified region + /// Size of the modified region in bytes + public void SignalModified(ulong address, ulong size) + { + // Must lock, as this can affect flushes from the background thread. + lock (_lock) + { + // We may overlap with some existing modified regions. They must be cut into by the new entry. + int count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps); + + ulong endAddress = address + size; + ulong syncNumber = _context.SyncNumber; + + for (int i = 0; i < count; i++) + { + // The overlaps must be removed or split. + + BufferModifiedRange overlap = _foregroundOverlaps[i]; + + if (overlap.Address == address && overlap.Size == size) + { + // Region already exists. Just update the existing sync number. + overlap.SyncNumber = syncNumber; + + return; + } + + Remove(overlap); + + if (overlap.Address < address && overlap.EndAddress > address) + { + // A split item must be created behind this overlap. + + Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber)); + } + + if (overlap.Address < endAddress && overlap.EndAddress > endAddress) + { + // A split item must be created after this overlap. + + Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber)); + } + } + + Add(new BufferModifiedRange(address, size, syncNumber)); + } + } + + /// + /// Gets modified ranges within the specified region, and then fires the given action for each range individually. + /// + /// Start address to query + /// Size to query + /// The action to call for each modified range + public void GetRanges(ulong address, ulong size, Action rangeAction) + { + int count = 0; + + // Range list must be consistent for this operation. + lock (_lock) + { + count = FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps); + } + + for (int i = 0; i < count; i++) + { + BufferModifiedRange overlap = _foregroundOverlaps[i]; + rangeAction(overlap.Address, overlap.Size); + } + } + + /// + /// Queries if a range exists within the specified region. + /// + /// Start address to query + /// Size to query + /// True if a range exists in the specified region, false otherwise + public bool HasRange(ulong address, ulong size) + { + // Range list must be consistent for this operation. + lock (_lock) + { + return FindOverlapsNonOverlapping(address, size, ref _foregroundOverlaps) > 0; + } + } + + /// + /// Gets modified ranges within the specified region, waits on ones from a previous sync number, + /// and then fires the given action for each range individually. + /// + /// + /// This function assumes it is called from the background thread. + /// Modifications from the current sync number are ignored because the guest should not expect them to be available yet. + /// They will remain reserved, so that any data sync prioritizes the data in the GPU. + /// + /// Start address to query + /// Size to query + /// The action to call for each modified range + public void WaitForAndGetRanges(ulong address, ulong size, Action rangeAction) + { + ulong endAddress = address + size; + ulong currentSync = _context.SyncNumber; + + int rangeCount = 0; + + // Range list must be consistent for this operation + lock (_lock) + { + rangeCount = FindOverlapsNonOverlapping(address, size, ref _backgroundOverlaps); + } + + if (rangeCount == 0) + { + return; + } + + // First, determine which syncpoint to wait on. + // This is the latest syncpoint that is not equal to the current sync. + + long highestDiff = long.MinValue; + + for (int i = 0; i < rangeCount; i++) + { + BufferModifiedRange overlap = _backgroundOverlaps[i]; + + long diff = (long)(overlap.SyncNumber - currentSync); + + if (diff < 0 && diff > highestDiff) + { + highestDiff = diff; + } + } + + if (highestDiff == long.MinValue) + { + return; + } + + // Wait for the syncpoint. + _context.Renderer.WaitSync(currentSync + (ulong)highestDiff); + + // Flush and remove all regions with the older syncpoint. + lock (_lock) + { + for (int i = 0; i < rangeCount; i++) + { + BufferModifiedRange overlap = _backgroundOverlaps[i]; + + long diff = (long)(overlap.SyncNumber - currentSync); + + if (diff <= highestDiff) + { + ulong clampAddress = Math.Max(address, overlap.Address); + ulong clampEnd = Math.Min(endAddress, overlap.EndAddress); + + ClearPart(overlap, clampAddress, clampEnd); + + rangeAction(clampAddress, clampEnd - clampAddress); + } + } + } + } + + /// + /// Inherit ranges from another modified range list. + /// + /// The range list to inherit from + /// The action to call for each modified range + public void InheritRanges(BufferModifiedRangeList ranges, Action rangeAction) + { + BufferModifiedRange[] inheritRanges; + + lock (ranges._lock) + { + inheritRanges = ranges.ToArray(); + } + + lock (_lock) + { + foreach (BufferModifiedRange range in inheritRanges) + { + Add(range); + } + } + + ulong currentSync = _context.SyncNumber; + foreach (BufferModifiedRange range in inheritRanges) + { + if (range.SyncNumber != currentSync) + { + rangeAction(range.Address, range.Size); + } + } + } + + private void ClearPart(BufferModifiedRange overlap, ulong address, ulong endAddress) + { + Remove(overlap); + + // If the overlap extends outside of the clear range, make sure those parts still exist. + + if (overlap.Address < address) + { + Add(new BufferModifiedRange(overlap.Address, address - overlap.Address, overlap.SyncNumber)); + } + + if (overlap.EndAddress > endAddress) + { + Add(new BufferModifiedRange(endAddress, overlap.EndAddress - endAddress, overlap.SyncNumber)); + } + } + + /// + /// Clear modified ranges within the specified area. + /// + /// Start address to clear + /// Size to clear + public void Clear(ulong address, ulong size) + { + lock (_lock) + { + // This function can be called from any thread, so it cannot use the arrays for background or foreground. + BufferModifiedRange[] toClear = new BufferModifiedRange[1]; + + int rangeCount = FindOverlapsNonOverlapping(address, size, ref toClear); + + ulong endAddress = address + size; + + for (int i = 0; i < rangeCount; i++) + { + BufferModifiedRange overlap = toClear[i]; + + ClearPart(overlap, address, endAddress); + } + } + } + + /// + /// Clear all modified ranges. + /// + public void Clear() + { + lock (_lock) + { + Items.Clear(); + } + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 3da22b22f5..7021cd2090 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -61,6 +61,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// GPU virtual address where the data is located /// Size of the data + /// True if read tracking is triggered on the span /// The span of the data at the specified memory location public ReadOnlySpan GetSpan(ulong va, int size, bool tracked = false) { diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 20e1c9f847..1dbe1805aa 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 1910; + private const ulong ShaderCodeGenVersion = 1790; /// /// Creates a new instance of the shader cache. diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index acbc24de0f..4a3f51bfd3 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -26,6 +26,8 @@ namespace Ryujinx.Graphics.OpenGL private TextureCopy _backgroundTextureCopy; internal TextureCopy TextureCopy => BackgroundContextWorker.InBackground ? _backgroundTextureCopy : _textureCopy; + private Sync _sync; + internal ResourcePool ResourcePool { get; } public string GpuVendor { get; private set; } @@ -39,6 +41,7 @@ namespace Ryujinx.Graphics.OpenGL _window = new Window(this); _textureCopy = new TextureCopy(this); _backgroundTextureCopy = new TextureCopy(this); + _sync = new Sync(); ResourcePool = new ResourcePool(); } @@ -108,6 +111,7 @@ namespace Ryujinx.Graphics.OpenGL public void PreFrame() { + _sync.Cleanup(); ResourcePool.Tick(); } @@ -164,6 +168,7 @@ namespace Ryujinx.Graphics.OpenGL _pipeline.Dispose(); _window.Dispose(); _counters.Dispose(); + _sync.Dispose(); } public IProgram LoadProgramBinary(byte[] programBinary) @@ -179,5 +184,15 @@ namespace Ryujinx.Graphics.OpenGL return null; } + + public void CreateSync(ulong id) + { + _sync.Create(id); + } + + public void WaitSync(ulong id) + { + _sync.Wait(id); + } } } diff --git a/Ryujinx.Graphics.OpenGL/Sync.cs b/Ryujinx.Graphics.OpenGL/Sync.cs new file mode 100644 index 0000000000..97a71fc4b9 --- /dev/null +++ b/Ryujinx.Graphics.OpenGL/Sync.cs @@ -0,0 +1,129 @@ +using OpenTK.Graphics.OpenGL; +using Ryujinx.Common.Logging; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Graphics.OpenGL +{ + class Sync : IDisposable + { + private class SyncHandle + { + public ulong ID; + public IntPtr Handle; + } + + private ulong _firstHandle = 0; + + private List Handles = new List(); + + public void Create(ulong id) + { + SyncHandle handle = new SyncHandle + { + ID = id, + Handle = GL.FenceSync(SyncCondition.SyncGpuCommandsComplete, WaitSyncFlags.None) + }; + + lock (Handles) + { + Handles.Add(handle); + } + } + + public void Wait(ulong id) + { + SyncHandle result = null; + + lock (Handles) + { + if ((long)(_firstHandle - id) > 0) + { + return; // The handle has already been signalled or deleted. + } + + foreach (SyncHandle handle in Handles) + { + if (handle.ID == id) + { + result = handle; + break; + } + } + } + + if (result != null) + { + lock (result) + { + if (result.Handle == IntPtr.Zero) + { + return; + } + + WaitSyncStatus syncResult = GL.ClientWaitSync(result.Handle, ClientWaitSyncFlags.SyncFlushCommandsBit, 1000000000); + + if (syncResult == WaitSyncStatus.TimeoutExpired) + { + Logger.Error?.PrintMsg(LogClass.Gpu, $"GL Sync Object {result.ID} failed to signal within 1000ms. Continuing..."); + } + } + } + } + + public void Cleanup() + { + // Iterate through handles and remove any that have already been signalled. + + while (true) + { + SyncHandle first = null; + lock (Handles) + { + first = Handles.FirstOrDefault(); + } + + if (first == null) break; + + WaitSyncStatus syncResult = GL.ClientWaitSync(first.Handle, ClientWaitSyncFlags.SyncFlushCommandsBit, 0); + + if (syncResult == WaitSyncStatus.AlreadySignaled) + { + // Delete the sync object. + lock (Handles) + { + lock (first) + { + _firstHandle = first.ID + 1; + Handles.RemoveAt(0); + GL.DeleteSync(first.Handle); + first.Handle = IntPtr.Zero; + } + } + } else + { + // This sync handle and any following have not been reached yet. + break; + } + } + } + + public void Dispose() + { + lock (Handles) + { + foreach (SyncHandle handle in Handles) + { + lock (handle) + { + GL.DeleteSync(handle.Handle); + handle.Handle = IntPtr.Zero; + } + } + + Handles.Clear(); + } + } + } +} diff --git a/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/Ryujinx.Graphics.Shader/BufferDescriptor.cs index 53a4fb164f..a3af6e41f9 100644 --- a/Ryujinx.Graphics.Shader/BufferDescriptor.cs +++ b/Ryujinx.Graphics.Shader/BufferDescriptor.cs @@ -4,11 +4,21 @@ namespace Ryujinx.Graphics.Shader { public readonly int Binding; public readonly int Slot; + public BufferUsageFlags Flags; public BufferDescriptor(int binding, int slot) { Binding = binding; Slot = slot; + + Flags = BufferUsageFlags.None; + } + + public BufferDescriptor SetFlag(BufferUsageFlags flag) + { + Flags |= flag; + + return this; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/Ryujinx.Graphics.Shader/BufferUsageFlags.cs new file mode 100644 index 0000000000..657546cb71 --- /dev/null +++ b/Ryujinx.Graphics.Shader/BufferUsageFlags.cs @@ -0,0 +1,18 @@ +using System; + +namespace Ryujinx.Graphics.Shader +{ + /// + /// Flags that indicate how a buffer will be used in a shader. + /// + [Flags] + public enum BufferUsageFlags + { + None = 0, + + /// + /// Buffer is written to. + /// + Write = 1 << 0 + } +} diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 6244f68b62..3bfc064752 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -298,6 +298,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32); + SetStorageWriteFlag(context, src1, context.Config.Stage); string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); return $"{sb} = {src}"; @@ -629,6 +630,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } } + private static void SetStorageWriteFlag(CodeGenContext context, IAstNode indexExpr, ShaderStage stage) + { + // Attempt to find a BufferDescriptor with the given index. + // If it cannot be resolved or is not constant, assume that the slot expression could potentially index any of them, + // and set the flag on all storage buffers. + + int index = -1; + + if (indexExpr is AstOperand operand && operand.Type == OperandType.Constant) + { + index = context.SBufferDescriptors.FindIndex(buffer => buffer.Slot == operand.Value); + } + + if (index != -1) + { + context.SBufferDescriptors[index] = context.SBufferDescriptors[index].SetFlag(BufferUsageFlags.Write); + } + else + { + for (int i = 0; i < context.SBufferDescriptors.Count; i++) + { + context.SBufferDescriptors[i] = context.SBufferDescriptors[i].SetFlag(BufferUsageFlags.Write); + } + } + } + private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage) { string sbName = OperandManager.GetShaderStagePrefix(stage); diff --git a/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs b/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs index 62b3ee4a10..0dd2ce4618 100644 --- a/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs +++ b/Ryujinx.Memory.Tests/MockVirtualMemoryManager.cs @@ -6,6 +6,8 @@ namespace Ryujinx.Memory.Tests { public bool NoMappings; + public event Action OnProtect; + public MockVirtualMemoryManager(ulong size, int pageSize) { } @@ -82,6 +84,7 @@ namespace Ryujinx.Memory.Tests public void TrackingReprotect(ulong va, ulong size, MemoryPermission protection) { + OnProtect?.Invoke(va, size, protection); } } } diff --git a/Ryujinx.Memory.Tests/TrackingTests.cs b/Ryujinx.Memory.Tests/TrackingTests.cs index 25c230922e..a9cc6df371 100644 --- a/Ryujinx.Memory.Tests/TrackingTests.cs +++ b/Ryujinx.Memory.Tests/TrackingTests.cs @@ -421,5 +421,68 @@ namespace Ryujinx.Memory.Tests Assert.AreEqual((0, 0), _tracking.GetRegionCounts()); } + + [Test] + public void ReadAndWriteProtection() + { + MemoryPermission protection = MemoryPermission.ReadAndWrite; + + _memoryManager.OnProtect += (va, size, newProtection) => + { + Assert.AreEqual((0, PageSize), (va, size)); // Should protect the exact region all the operations use. + protection = newProtection; + }; + + RegionHandle handle = _tracking.BeginTracking(0, PageSize); + + // After creating the handle, there is no protection yet. + Assert.AreEqual(MemoryPermission.ReadAndWrite, protection); + + bool dirtyInitial = handle.Dirty; + Assert.True(dirtyInitial); // Handle starts dirty. + + handle.Reprotect(); + + // After a reprotect, there is write protection, which will set a dirty flag when any write happens. + Assert.AreEqual(MemoryPermission.Read, protection); + + (ulong address, ulong size)? readTrackingTriggered = null; + handle.RegisterAction((address, size) => + { + readTrackingTriggered = (address, size); + }); + + // Registering an action adds read/write protection. + Assert.AreEqual(MemoryPermission.None, protection); + + bool dirtyAfterReprotect = handle.Dirty; + Assert.False(dirtyAfterReprotect); // Handle is no longer dirty. + + // First we should read, which will trigger the action. This _should not_ remove write protection on the memory. + + _tracking.VirtualMemoryEvent(0, 4, false); + + bool dirtyAfterRead = handle.Dirty; + Assert.False(dirtyAfterRead); // Not dirtied, as this was a read. + + Assert.AreEqual(readTrackingTriggered, (0UL, 4UL)); // Read action was triggered. + + Assert.AreEqual(MemoryPermission.Read, protection); // Write protection is still present. + + readTrackingTriggered = null; + + // Now, perform a write. + + _tracking.VirtualMemoryEvent(0, 4, true); + + bool dirtyAfterWriteAfterRead = handle.Dirty; + Assert.True(dirtyAfterWriteAfterRead); // Should be dirty. + + Assert.AreEqual(MemoryPermission.ReadAndWrite, protection); // All protection is now be removed from the memory. + + Assert.IsNull(readTrackingTriggered); // Read tracking was removed when the action fired, as it can only fire once. + + handle.Dispose(); + } } } diff --git a/Ryujinx.Memory/Range/RangeList.cs b/Ryujinx.Memory/Range/RangeList.cs index 3c8c4c4cdd..fd26065632 100644 --- a/Ryujinx.Memory/Range/RangeList.cs +++ b/Ryujinx.Memory/Range/RangeList.cs @@ -12,16 +12,16 @@ namespace Ryujinx.Memory.Range { private const int ArrayGrowthSize = 32; - private readonly List _items; + protected readonly List Items; - public int Count => _items.Count; + public int Count => Items.Count; /// /// Creates a new range list. /// public RangeList() { - _items = new List(); + Items = new List(); } /// @@ -37,7 +37,7 @@ namespace Ryujinx.Memory.Range index = ~index; } - _items.Insert(index, item); + Items.Insert(index, item); } /// @@ -51,21 +51,21 @@ namespace Ryujinx.Memory.Range if (index >= 0) { - while (index > 0 && _items[index - 1].Address == item.Address) + while (index > 0 && Items[index - 1].Address == item.Address) { index--; } - while (index < _items.Count) + while (index < Items.Count) { - if (_items[index].Equals(item)) + if (Items[index].Equals(item)) { - _items.RemoveAt(index); + Items.RemoveAt(index); return true; } - if (_items[index].Address > item.Address) + if (Items[index].Address > item.Address) { break; } @@ -110,7 +110,7 @@ namespace Ryujinx.Memory.Range return default(T); } - return _items[index]; + return Items[index]; } /// @@ -137,7 +137,7 @@ namespace Ryujinx.Memory.Range ulong endAddress = address + size; - foreach (T item in _items) + foreach (T item in Items) { if (item.Address >= endAddress) { @@ -196,7 +196,7 @@ namespace Ryujinx.Memory.Range if (index >= 0) { - while (index > 0 && _items[index - 1].OverlapsWith(address, size)) + while (index > 0 && Items[index - 1].OverlapsWith(address, size)) { index--; } @@ -208,9 +208,9 @@ namespace Ryujinx.Memory.Range Array.Resize(ref output, outputIndex + ArrayGrowthSize); } - output[outputIndex++] = _items[index++]; + output[outputIndex++] = Items[index++]; } - while (index < _items.Count && _items[index].OverlapsWith(address, size)); + while (index < Items.Count && Items[index].OverlapsWith(address, size)); } return outputIndex; @@ -230,14 +230,14 @@ namespace Ryujinx.Memory.Range if (index >= 0) { - while (index > 0 && _items[index - 1].Address == address) + while (index > 0 && Items[index - 1].Address == address) { index--; } - while (index < _items.Count) + while (index < Items.Count) { - T overlap = _items[index++]; + T overlap = Items[index++]; if (overlap.Address != address) { @@ -264,7 +264,7 @@ namespace Ryujinx.Memory.Range private int BinarySearch(ulong address) { int left = 0; - int right = _items.Count - 1; + int right = Items.Count - 1; while (left <= right) { @@ -272,7 +272,7 @@ namespace Ryujinx.Memory.Range int middle = left + (range >> 1); - T item = _items[middle]; + T item = Items[middle]; if (item.Address == address) { @@ -301,7 +301,7 @@ namespace Ryujinx.Memory.Range private int BinarySearch(ulong address, ulong size) { int left = 0; - int right = _items.Count - 1; + int right = Items.Count - 1; while (left <= right) { @@ -309,7 +309,7 @@ namespace Ryujinx.Memory.Range int middle = left + (range >> 1); - T item = _items[middle]; + T item = Items[middle]; if (item.OverlapsWith(address, size)) { @@ -331,12 +331,12 @@ namespace Ryujinx.Memory.Range public IEnumerator GetEnumerator() { - return _items.GetEnumerator(); + return Items.GetEnumerator(); } IEnumerator IEnumerable.GetEnumerator() { - return _items.GetEnumerator(); + return Items.GetEnumerator(); } } } \ No newline at end of file diff --git a/Ryujinx.Memory/Tracking/MultiRegionHandle.cs b/Ryujinx.Memory/Tracking/MultiRegionHandle.cs index 02ae3a8bb8..df154bc220 100644 --- a/Ryujinx.Memory/Tracking/MultiRegionHandle.cs +++ b/Ryujinx.Memory/Tracking/MultiRegionHandle.cs @@ -123,6 +123,17 @@ namespace Ryujinx.Memory.Tracking } } + public void RegisterAction(ulong address, ulong size, RegionSignal action) + { + int startHandle = (int)((address - Address) / Granularity); + int lastHandle = (int)((address + (size - 1) - Address) / Granularity); + + for (int i = startHandle; i <= lastHandle; i++) + { + _handles[i].RegisterAction(action); + } + } + public void Dispose() { foreach (var handle in _handles) diff --git a/Ryujinx.Memory/Tracking/RegionHandle.cs b/Ryujinx.Memory/Tracking/RegionHandle.cs index 96898c214f..3ddcb6db48 100644 --- a/Ryujinx.Memory/Tracking/RegionHandle.cs +++ b/Ryujinx.Memory/Tracking/RegionHandle.cs @@ -24,6 +24,7 @@ namespace Ryujinx.Memory.Tracking private readonly MemoryTracking _tracking; internal MemoryPermission RequiredPermission => _preAction != null ? MemoryPermission.None : (Dirty ? MemoryPermission.ReadAndWrite : MemoryPermission.Read); + internal RegionSignal PreAction => _preAction; /// /// Create a new region handle. The handle is registered with the given tracking object, diff --git a/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs b/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs index 6018840019..8bc10c411e 100644 --- a/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs +++ b/Ryujinx.Memory/Tracking/SmartMultiRegionHandle.cs @@ -41,6 +41,17 @@ namespace Ryujinx.Memory.Tracking Dirty = true; } + public void RegisterAction(RegionSignal action) + { + foreach (var handle in _handles) + { + if (handle != null) + { + handle?.RegisterAction((address, size) => action(handle.Address, handle.Size)); + } + } + } + public void QueryModified(Action modifiedAction) { if (!Dirty) @@ -66,14 +77,23 @@ namespace Ryujinx.Memory.Tracking ulong size = HandlesToBytes(splitIndex - handleIndex); // First, the target handle must be removed. Its data can still be used to determine the new handles. + RegionSignal signal = handle.PreAction; handle.Dispose(); RegionHandle splitLow = _tracking.BeginTracking(address, size); splitLow.Parent = this; + if (signal != null) + { + splitLow.RegisterAction(signal); + } _handles[handleIndex] = splitLow; RegionHandle splitHigh = _tracking.BeginTracking(address + size, handle.Size - size); splitHigh.Parent = this; + if (signal != null) + { + splitHigh.RegisterAction(signal); + } _handles[splitIndex] = splitHigh; } diff --git a/Ryujinx.Memory/Tracking/VirtualRegion.cs b/Ryujinx.Memory/Tracking/VirtualRegion.cs index 90fb55d655..15a11568e7 100644 --- a/Ryujinx.Memory/Tracking/VirtualRegion.cs +++ b/Ryujinx.Memory/Tracking/VirtualRegion.cs @@ -22,12 +22,12 @@ namespace Ryujinx.Memory.Tracking public override void Signal(ulong address, ulong size, bool write) { - _tracking.ProtectVirtualRegion(this, MemoryPermission.ReadAndWrite); // Remove our protection immedately. - foreach (var handle in Handles) { handle.Signal(address, size, write); } + + UpdateProtection(); } /// From 4da674286148d804be6bddb9011d3d28924caf0e Mon Sep 17 00:00:00 2001 From: pineappleEA <67879877+pineappleEA@users.noreply.github.com> Date: Mon, 18 Jan 2021 22:33:58 +0200 Subject: [PATCH 02/27] Fix Linux Icon (#1927) --- Ryujinx/Ui/Applet/ErrorAppletDialog.cs | 3 +++ Ryujinx/Ui/MainWindow.cs | 2 ++ Ryujinx/Ui/Widgets/GameTableContextMenu.cs | 4 ++++ Ryujinx/Ui/Widgets/GtkDialog.cs | 2 ++ Ryujinx/Ui/Widgets/ProfileDialog.cs | 2 ++ Ryujinx/Ui/Windows/AboutWindow.cs | 2 ++ Ryujinx/Ui/Windows/ControllerWindow.cs | 2 ++ Ryujinx/Ui/Windows/SettingsWindow.cs | 3 +++ 8 files changed, 20 insertions(+) diff --git a/Ryujinx/Ui/Applet/ErrorAppletDialog.cs b/Ryujinx/Ui/Applet/ErrorAppletDialog.cs index a51d532447..db02040f68 100644 --- a/Ryujinx/Ui/Applet/ErrorAppletDialog.cs +++ b/Ryujinx/Ui/Applet/ErrorAppletDialog.cs @@ -1,4 +1,5 @@ using Gtk; +using System.Reflection; namespace Ryujinx.Ui.Applet { @@ -6,6 +7,8 @@ namespace Ryujinx.Ui.Applet { public ErrorAppletDialog(Window parentWindow, DialogFlags dialogFlags, MessageType messageType, string[] buttons) : base(parentWindow, dialogFlags, messageType, ButtonsType.None, null) { + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); + int responseId = 0; if (buttons != null) diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index 2e3437292a..778afd1249 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -22,6 +22,7 @@ using Ryujinx.Ui.Windows; using System; using System.Diagnostics; using System.IO; +using System.Reflection; using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -110,6 +111,7 @@ namespace Ryujinx.Ui DefaultWidth = monitorWidth < 1280 ? monitorWidth : 1280; DefaultHeight = monitorHeight < 760 ? monitorHeight : 760; + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); Title = $"Ryujinx {Program.Version}"; // Hide emulation context status bar. diff --git a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs index 5ee8baa381..79cda5ae0c 100644 --- a/Ryujinx/Ui/Widgets/GameTableContextMenu.cs +++ b/Ryujinx/Ui/Widgets/GameTableContextMenu.cs @@ -20,6 +20,7 @@ using System.Buffers; using System.Collections.Generic; using System.Globalization; using System.IO; +using System.Reflection; using System.Threading; using static LibHac.Fs.ApplicationSaveDataManagement; @@ -85,6 +86,7 @@ namespace Ryujinx.Ui.Widgets using MessageDialog messageDialog = new MessageDialog(null, DialogFlags.Modal, MessageType.Question, ButtonsType.YesNo, null) { Title = "Ryujinx", + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"), Text = $"There is no savedata for {titleName} [{titleId:x16}]", SecondaryText = "Would you like to create savedata for this game?", WindowPosition = WindowPosition.Center @@ -194,6 +196,7 @@ namespace Ryujinx.Ui.Widgets _dialog = new MessageDialog(null, DialogFlags.DestroyWithParent, MessageType.Info, ButtonsType.Cancel, null) { Title = "Ryujinx - NCA Section Extractor", + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"), SecondaryText = $"Extracting {ncaSectionType} section from {System.IO.Path.GetFileName(_titleFilePath)}...", WindowPosition = WindowPosition.Center }; @@ -310,6 +313,7 @@ namespace Ryujinx.Ui.Widgets MessageDialog dialog = new MessageDialog(null, DialogFlags.DestroyWithParent, MessageType.Info, ButtonsType.Ok, null) { Title = "Ryujinx - NCA Section Extractor", + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"), SecondaryText = "Extraction has completed successfully.", WindowPosition = WindowPosition.Center }; diff --git a/Ryujinx/Ui/Widgets/GtkDialog.cs b/Ryujinx/Ui/Widgets/GtkDialog.cs index e603383ab6..d8bad60f23 100644 --- a/Ryujinx/Ui/Widgets/GtkDialog.cs +++ b/Ryujinx/Ui/Widgets/GtkDialog.cs @@ -1,4 +1,5 @@ using Gtk; +using System.Reflection; using Ryujinx.Common.Logging; namespace Ryujinx.Ui.Widgets @@ -11,6 +12,7 @@ namespace Ryujinx.Ui.Widgets : base(null, DialogFlags.Modal, messageType, buttonsType, null) { Title = title; + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); Text = mainText; SecondaryText = secondaryText; WindowPosition = WindowPosition.Center; diff --git a/Ryujinx/Ui/Widgets/ProfileDialog.cs b/Ryujinx/Ui/Widgets/ProfileDialog.cs index 8666757263..0f94bd6e61 100644 --- a/Ryujinx/Ui/Widgets/ProfileDialog.cs +++ b/Ryujinx/Ui/Widgets/ProfileDialog.cs @@ -1,5 +1,6 @@ using Gtk; using System; +using System.Reflection; using GUI = Gtk.Builder.ObjectAttribute; @@ -19,6 +20,7 @@ namespace Ryujinx.Ui.Widgets private ProfileDialog(Builder builder) : base(builder.GetObject("_profileDialog").Handle) { builder.Autoconnect(this); + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); } private void OkToggle_Activated(object sender, EventArgs args) diff --git a/Ryujinx/Ui/Windows/AboutWindow.cs b/Ryujinx/Ui/Windows/AboutWindow.cs index ab93e41d11..05c6b13e5e 100644 --- a/Ryujinx/Ui/Windows/AboutWindow.cs +++ b/Ryujinx/Ui/Windows/AboutWindow.cs @@ -3,6 +3,7 @@ using Ryujinx.Common.Utilities; using Ryujinx.Ui.Helper; using System.Net.Http; using System.Net.NetworkInformation; +using System.Reflection; using System.Threading.Tasks; namespace Ryujinx.Ui.Windows @@ -11,6 +12,7 @@ namespace Ryujinx.Ui.Windows { public AboutWindow() : base($"Ryujinx {Program.Version} - About") { + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); InitializeComponent(); _ = DownloadPatronsJson(); diff --git a/Ryujinx/Ui/Windows/ControllerWindow.cs b/Ryujinx/Ui/Windows/ControllerWindow.cs index 7b0f7cf8b0..a5345b0354 100644 --- a/Ryujinx/Ui/Windows/ControllerWindow.cs +++ b/Ryujinx/Ui/Windows/ControllerWindow.cs @@ -94,6 +94,8 @@ namespace Ryujinx.Ui.Windows private ControllerWindow(Builder builder, PlayerIndex controllerId) : base(builder.GetObject("_controllerWin").Handle) { + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); + builder.Autoconnect(this); _playerIndex = controllerId; diff --git a/Ryujinx/Ui/Windows/SettingsWindow.cs b/Ryujinx/Ui/Windows/SettingsWindow.cs index 4497dedf1f..ba64226c37 100644 --- a/Ryujinx/Ui/Windows/SettingsWindow.cs +++ b/Ryujinx/Ui/Windows/SettingsWindow.cs @@ -11,6 +11,7 @@ using System; using System.Collections.Generic; using System.Globalization; using System.IO; +using System.Reflection; using System.Threading.Tasks; using GUI = Gtk.Builder.ObjectAttribute; @@ -91,6 +92,8 @@ namespace Ryujinx.Ui.Windows private SettingsWindow(MainWindow parent, Builder builder, VirtualFileSystem virtualFileSystem, HLE.FileSystem.Content.ContentManager contentManager) : base(builder.GetObject("_settingsWin").Handle) { + Icon = new Gdk.Pixbuf(Assembly.GetExecutingAssembly(), "Ryujinx.Ui.Resources.Logo_Ryujinx.png"); + _parent = parent; builder.Autoconnect(this); From 5e1a839eaa7349342fc34a7adf4d901222b2343b Mon Sep 17 00:00:00 2001 From: mageven <62494521+mageven@users.noreply.github.com> Date: Tue, 19 Jan 2021 05:26:53 +0530 Subject: [PATCH 03/27] Emulate a circular zone for keyboard analog sticks (#1906) --- Ryujinx/Ui/KeyboardController.cs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/Ryujinx/Ui/KeyboardController.cs b/Ryujinx/Ui/KeyboardController.cs index f52642e3e1..f201c28331 100644 --- a/Ryujinx/Ui/KeyboardController.cs +++ b/Ryujinx/Ui/KeyboardController.cs @@ -1,4 +1,5 @@ using System; +using OpenTK; using OpenTK.Input; using Ryujinx.Common.Configuration.Hid; using Ryujinx.Configuration; @@ -68,13 +69,16 @@ namespace Ryujinx.Ui short dx = 0; short dy = 0; - - if (keyboard[(Key)_config.LeftJoycon.StickUp]) dy = short.MaxValue; - if (keyboard[(Key)_config.LeftJoycon.StickDown]) dy = -short.MaxValue; - if (keyboard[(Key)_config.LeftJoycon.StickLeft]) dx = -short.MaxValue; - if (keyboard[(Key)_config.LeftJoycon.StickRight]) dx = short.MaxValue; - return (dx, dy); + if (keyboard[(Key)_config.LeftJoycon.StickUp]) dy += 1; + if (keyboard[(Key)_config.LeftJoycon.StickDown]) dy += -1; + if (keyboard[(Key)_config.LeftJoycon.StickLeft]) dx += -1; + if (keyboard[(Key)_config.LeftJoycon.StickRight]) dx += 1; + + Vector2 stick = new Vector2(dx, dy); + stick.NormalizeFast(); + + return ((short)(stick.X * short.MaxValue), (short)(stick.Y * short.MaxValue)); } public (short, short) GetRightStick() @@ -84,12 +88,15 @@ namespace Ryujinx.Ui short dx = 0; short dy = 0; - if (keyboard[(Key)_config.RightJoycon.StickUp]) dy = short.MaxValue; - if (keyboard[(Key)_config.RightJoycon.StickDown]) dy = -short.MaxValue; - if (keyboard[(Key)_config.RightJoycon.StickLeft]) dx = -short.MaxValue; - if (keyboard[(Key)_config.RightJoycon.StickRight]) dx = short.MaxValue; + if (keyboard[(Key)_config.RightJoycon.StickUp]) dy += 1; + if (keyboard[(Key)_config.RightJoycon.StickDown]) dy += -1; + if (keyboard[(Key)_config.RightJoycon.StickLeft]) dx += -1; + if (keyboard[(Key)_config.RightJoycon.StickRight]) dx += 1; - return (dx, dy); + Vector2 stick = new Vector2(dx, dy); + stick.NormalizeFast(); + + return ((short)(stick.X * short.MaxValue), (short)(stick.Y * short.MaxValue)); } public static HotkeyButtons GetHotkeyButtons(KeyboardState keyboard) From 1364f3616111b8517ac48ee506d556c364c8a6a5 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Tue, 19 Jan 2021 03:28:35 +0100 Subject: [PATCH 04/27] am: Implement CreateHandleStorage and fixes (#1929) --- .../ILibraryAppletCreator.cs | 47 +++++++++++++++---- .../HOS/Services/Am/AppletAE/IStorage.cs | 8 ++-- .../Services/Am/AppletAE/IStorageAccessor.cs | 5 ++ 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs index 5b91e235ed..9fd002a217 100644 --- a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs +++ b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs @@ -26,8 +26,15 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE.AllSystemAppletProxiesService.Sys { long size = context.RequestData.ReadInt64(); + if (size <= 0) + { + return ResultCode.ObjectInvalid; + } + MakeObject(context, new IStorage(new byte[size])); + // NOTE: Returns ResultCode.MemoryAllocationFailed if IStorage is null, it doesn't occur in our case. + return ResultCode.Success; } @@ -35,20 +42,44 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE.AllSystemAppletProxiesService.Sys // CreateTransferMemoryStorage(b8, u64, handle) -> object public ResultCode CreateTransferMemoryStorage(ServiceCtx context) { - bool unknown = context.RequestData.ReadBoolean(); - long size = context.RequestData.ReadInt64(); - int handle = context.Request.HandleDesc.ToCopy[0]; + bool isReadOnly = context.RequestData.ReadBoolean(); + long size = context.RequestData.ReadInt64(); + int handle = context.Request.HandleDesc.ToCopy[0]; KTransferMemory transferMem = context.Process.HandleTable.GetObject(handle); - if (transferMem == null) + if (size <= 0) { - Logger.Warning?.Print(LogClass.ServiceAm, $"Invalid TransferMemory Handle: {handle:X}"); - - return ResultCode.Success; // TODO: Find correct error code + return ResultCode.ObjectInvalid; } - var data = new byte[transferMem.Size]; + byte[] data = new byte[transferMem.Size]; + + transferMem.Creator.CpuMemory.Read(transferMem.Address, data); + + context.Device.System.KernelContext.Syscall.CloseHandle(handle); + + MakeObject(context, new IStorage(data, isReadOnly)); + + return ResultCode.Success; + } + + [Command(12)] // 2.0.0+ + // CreateHandleStorage(u64, handle) -> object + public ResultCode CreateHandleStorage(ServiceCtx context) + { + long size = context.RequestData.ReadInt64(); + int handle = context.Request.HandleDesc.ToCopy[0]; + + KTransferMemory transferMem = context.Process.HandleTable.GetObject(handle); + + if (size <= 0) + { + return ResultCode.ObjectInvalid; + } + + byte[] data = new byte[transferMem.Size]; + transferMem.Creator.CpuMemory.Read(transferMem.Address, data); context.Device.System.KernelContext.Syscall.CloseHandle(handle); diff --git a/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorage.cs b/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorage.cs index 37514275db..e4b7d1984d 100644 --- a/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorage.cs +++ b/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorage.cs @@ -2,11 +2,13 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE { class IStorage : IpcService { - public byte[] Data { get; private set; } + public bool IsReadOnly { get; private set; } + public byte[] Data { get; private set; } - public IStorage(byte[] data) + public IStorage(byte[] data, bool isReadOnly = false) { - Data = data; + IsReadOnly = isReadOnly; + Data = data; } [Command(0)] diff --git a/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorageAccessor.cs b/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorageAccessor.cs index ddd97a4c02..721cf1f9d2 100644 --- a/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorageAccessor.cs +++ b/Ryujinx.HLE/HOS/Services/Am/AppletAE/IStorageAccessor.cs @@ -24,6 +24,11 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE // Write(u64, buffer) public ResultCode Write(ServiceCtx context) { + if (_storage.IsReadOnly) + { + return ResultCode.ObjectInvalid; + } + long writePosition = context.RequestData.ReadInt64(); if (writePosition > _storage.Data.Length) From 734747ae5806f85a9378d446d246b7eb27012bb6 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 18 Jan 2021 23:31:15 -0300 Subject: [PATCH 05/27] Reduce temporary copy/fill buffer size (#1926) --- Ryujinx.Memory/IVirtualMemoryManager.cs | 2 +- Ryujinx.Memory/MemoryBlock.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Ryujinx.Memory/IVirtualMemoryManager.cs b/Ryujinx.Memory/IVirtualMemoryManager.cs index cd271a5f94..f52c4b2205 100644 --- a/Ryujinx.Memory/IVirtualMemoryManager.cs +++ b/Ryujinx.Memory/IVirtualMemoryManager.cs @@ -15,7 +15,7 @@ namespace Ryujinx.Memory void Fill(ulong va, ulong size, byte value) { - const int MaxChunkSize = 1 << 30; + const int MaxChunkSize = 1 << 24; for (ulong subOffset = 0; subOffset < size; subOffset += MaxChunkSize) { diff --git a/Ryujinx.Memory/MemoryBlock.cs b/Ryujinx.Memory/MemoryBlock.cs index fadd50d44d..3b7a54ae9b 100644 --- a/Ryujinx.Memory/MemoryBlock.cs +++ b/Ryujinx.Memory/MemoryBlock.cs @@ -136,7 +136,7 @@ namespace Ryujinx.Memory /// Throw when , or is out of range public void Copy(ulong dstOffset, ulong srcOffset, ulong size) { - const int MaxChunkSize = 1 << 30; + const int MaxChunkSize = 1 << 24; for (ulong offset = 0; offset < size; offset += MaxChunkSize) { @@ -155,7 +155,7 @@ namespace Ryujinx.Memory /// Throw when either or are out of range public void ZeroFill(ulong offset, ulong size) { - const int MaxChunkSize = 1 << 30; + const int MaxChunkSize = 1 << 24; for (ulong subOffset = 0; subOffset < size; subOffset += MaxChunkSize) { From 2fe3b8e58c63196327214319f5d9654a4b3d30b2 Mon Sep 17 00:00:00 2001 From: Sera <62521228+SeraUQ@users.noreply.github.com> Date: Tue, 19 Jan 2021 03:31:59 +0100 Subject: [PATCH 06/27] Fix some GLXBadDrawable crashes on linux (#1900) Fixes the crashes on linux when you stop emulation, and when you try to exit the emulator while a game is running. Also tested on windows without problems on my side. --- Ryujinx/Ui/MainWindow.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index 778afd1249..1c847f4a68 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -593,7 +593,6 @@ namespace Ryujinx.Ui ToggleExtraWidgets(true); } - _viewBox.Remove(GlRendererWidget); GlRendererWidget.Exit(); if(GlRendererWidget.Window != Window && GlRendererWidget.Window != null) @@ -606,6 +605,7 @@ namespace Ryujinx.Ui _windowsMultimediaTimerResolution?.Dispose(); _windowsMultimediaTimerResolution = null; + _viewBox.Remove(GlRendererWidget); _viewBox.Add(_gameTableWindow); _gameTableWindow.Expand = true; @@ -713,6 +713,7 @@ namespace Ryujinx.Ui // Wait for the other thread to dispose the HLE context before exiting. _deviceExitStatus.WaitOne(); + GlRendererWidget.Dispose(); } } @@ -1202,4 +1203,4 @@ namespace Ryujinx.Ui UpdateGameTable(); } } -} \ No newline at end of file +} From 03aab63e0320ab8c3097d59ca5c474f6168e48a4 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 19 Jan 2021 00:04:38 -0300 Subject: [PATCH 07/27] Fix out of range exception when a invalid base lod is used (#1931) --- Ryujinx.Graphics.Gpu/Image/TexturePool.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs index 065844cb02..dfcd8a528a 100644 --- a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs +++ b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs @@ -223,7 +223,7 @@ namespace Ryujinx.Graphics.Gpu.Image layerSize = sizeInfo.LayerSize; - if (minLod != 0) + if (minLod != 0 && minLod < levels) { // If the base level is not zero, we additionally add the mip level offset // to the address, this allows the texture manager to find the base level from the From b8353f5639cd61cfe33bb3af8f93988f31b3e444 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 19 Jan 2021 00:19:52 -0300 Subject: [PATCH 08/27] Enable parallel ASTC decoding by default (#1930) --- Ryujinx.Graphics.Gpu/Image/Texture.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index 1c558f567d..3c576cb719 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -646,7 +646,7 @@ namespace Ryujinx.Graphics.Gpu.Image // - BC4/BC5 is not supported on 3D textures. if (!_context.Capabilities.SupportsAstcCompression && Info.FormatInfo.Format.IsAstc()) { - if (!AstcDecoder.TryDecodeToRgba8( + if (!AstcDecoder.TryDecodeToRgba8P( data.ToArray(), Info.FormatInfo.BlockWidth, Info.FormatInfo.BlockHeight, From c3e0c41da3cef647b8bea54f77103fbad85098ba Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Tue, 19 Jan 2021 23:12:33 +0100 Subject: [PATCH 09/27] CPU (A64): Add Fmaxnmp & Fminnmp Scalar Inst.s, Fast & Slow Paths; with Tests. (#1894) --- ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 1 + ARMeilleure/Decoders/OpCodeTable.cs | 2 + .../Instructions/InstEmitSimdArithmetic.cs | 58 ++++++++++++++----- .../Instructions/InstEmitSimdHelper.cs | 43 ++++++++++++++ ARMeilleure/Instructions/InstName.cs | 2 + .../IntermediateRepresentation/Intrinsic.cs | 1 + Ryujinx.Tests/Cpu/CpuTestSimd.cs | 29 ++++++---- 7 files changed, 111 insertions(+), 25 deletions(-) diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index 9030be3c1e..5deee349a6 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -119,6 +119,7 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount)); Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary)); Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm)); Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary)); Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary)); Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary)); diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index b19124851a..928d0e0d6a 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -311,6 +311,7 @@ namespace ARMeilleure.Decoders SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, OpCodeSimdReg.Create); SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, OpCodeSimdReg.Create); SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, OpCodeSimdReg.Create); + SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create); SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create); SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create); SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create); @@ -319,6 +320,7 @@ namespace ARMeilleure.Decoders SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, OpCodeSimdReg.Create); SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, OpCodeSimdReg.Create); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, OpCodeSimdReg.Create); + SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create); SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create); SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index 88be07bdd3..bd6a98bed8 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -347,19 +347,17 @@ namespace ARMeilleure.Instructions public static void Faddp_S(ArmEmitterContext context) { - OpCodeSimd op = (OpCodeSimd)context.CurrOp; - - int sizeF = op.Size & 1; - if (Optimizations.FastFP && Optimizations.UseSse3) { - if (sizeF == 0) + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) { Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn)); context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } - else /* if (sizeF == 1) */ + else /* if ((op.Size & 1) == 1) */ { Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn)); @@ -368,14 +366,10 @@ namespace ARMeilleure.Instructions } else { - OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; - - Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); - Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); - - Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), ne0, ne1); - - context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); } } @@ -552,6 +546,24 @@ namespace ARMeilleure.Instructions } } + public static void Fmaxnmp_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + public static void Fmaxnmp_V(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse41) @@ -708,6 +720,24 @@ namespace ARMeilleure.Instructions } } + public static void Fminnmp_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + public static void Fminnmp_V(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse41) diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index eab891ec5e..e9d5303c78 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1118,6 +1118,49 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); } + public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); + + Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0); + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand op0, op1; + + if ((op.Size & 1) == 0) + { + const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0; + + Operand zeroN = context.VectorZeroUpper64(n); + + op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0)); + op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1)); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand zero = context.VectorZero(); + + op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero); + op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n); + } + + context.Copy(GetVec(op.Rd), emit(op0, op1)); + } + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index a0ec9dc394..990e4393f1 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -212,6 +212,7 @@ namespace ARMeilleure.Instructions Fmax_V, Fmaxnm_S, Fmaxnm_V, + Fmaxnmp_S, Fmaxnmp_V, Fmaxnmv_V, Fmaxp_V, @@ -220,6 +221,7 @@ namespace ARMeilleure.Instructions Fmin_V, Fminnm_S, Fminnm_V, + Fminnmp_S, Fminnmp_V, Fminnmv_V, Fminp_V, diff --git a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs index e2989863b8..1ddf93e5b1 100644 --- a/ARMeilleure/IntermediateRepresentation/Intrinsic.cs +++ b/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -108,6 +108,7 @@ namespace ARMeilleure.IntermediateRepresentation X86Popcnt, X86Por, X86Pshufb, + X86Pshufd, X86Pslld, X86Pslldq, X86Psllq, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 1371de4b76..89c2857089 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -715,19 +715,23 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_Add_P_S_2SS_() + private static uint[] _F_Add_Max_Min_Nm_P_S_2SS_() { return new uint[] { - 0x7E30D820u // FADDP S0, V1.2S + 0x7E30D820u, // FADDP S0, V1.2S + 0x7E30C820u, // FMAXNMP S0, V1.2S + 0x7EB0C820u // FMINNMP S0, V1.2S }; } - private static uint[] _F_Add_P_S_2DD_() + private static uint[] _F_Add_Max_Min_Nm_P_S_2DD_() { return new uint[] { - 0x7E70D820u // FADDP D0, V1.2D + 0x7E70D820u, // FADDP D0, V1.2D + 0x7E70C820u, // FMAXNMP D0, V1.2D + 0x7EF0C820u // FMINNMP D0, V1.2D }; } @@ -1802,12 +1806,13 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Add_P_S_2SS([ValueSource("_F_Add_P_S_2SS_")] uint opcodes, - [ValueSource("_2S_F_")] ulong a) + public void F_Add_Max_Min_Nm_P_S_2SS([ValueSource("_F_Add_Max_Min_Nm_P_S_2SS_")] uint opcodes, + [ValueSource("_2S_F_")] ulong a) { ulong z = TestContext.CurrentContext.Random.NextULong(); + V128 v0 = MakeVectorE0E1(z, z); - V128 v1 = MakeVectorE0(a); + V128 v1 = MakeVectorE0E1(a, z); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); @@ -1820,12 +1825,14 @@ namespace Ryujinx.Tests.Cpu } [Test, Pairwise] [Explicit] - public void F_Add_P_S_2DD([ValueSource("_F_Add_P_S_2DD_")] uint opcodes, - [ValueSource("_1D_F_")] ulong a) + public void F_Add_Max_Min_Nm_P_S_2DD([ValueSource("_F_Add_Max_Min_Nm_P_S_2DD_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a0, + [ValueSource("_1D_F_")] ulong a1) { ulong z = TestContext.CurrentContext.Random.NextULong(); - V128 v0 = MakeVectorE1(z); - V128 v1 = MakeVectorE0E1(a, a); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a0, a1); int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); From 6a95a3b01a4b68cf944a2ea0733f6b8008aa8357 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 19 Jan 2021 21:48:27 -0300 Subject: [PATCH 10/27] Fix alignment on CreateTransferMemoryStorage (#1937) --- .../SystemAppletProxy/ILibraryAppletCreator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs index 9fd002a217..b64da12f14 100644 --- a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs +++ b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs @@ -42,7 +42,7 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE.AllSystemAppletProxiesService.Sys // CreateTransferMemoryStorage(b8, u64, handle) -> object public ResultCode CreateTransferMemoryStorage(ServiceCtx context) { - bool isReadOnly = context.RequestData.ReadBoolean(); + bool isReadOnly = (context.RequestData.ReadInt64() & 1) != 0; long size = context.RequestData.ReadInt64(); int handle = context.Request.HandleDesc.ToCopy[0]; From c72f78b4d481b889c46196792dff913cfd8becdc Mon Sep 17 00:00:00 2001 From: mageven <62494521+mageven@users.noreply.github.com> Date: Wed, 20 Jan 2021 23:29:51 +0530 Subject: [PATCH 11/27] Fix SL/SR typo in keyboard controller mapping (#1938) --- Ryujinx/Ui/KeyboardController.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Ryujinx/Ui/KeyboardController.cs b/Ryujinx/Ui/KeyboardController.cs index f201c28331..3fb249dbc7 100644 --- a/Ryujinx/Ui/KeyboardController.cs +++ b/Ryujinx/Ui/KeyboardController.cs @@ -47,7 +47,7 @@ namespace Ryujinx.Ui if (keyboard[(Key)_config.LeftJoycon.ButtonL]) buttons |= ControllerKeys.L; if (keyboard[(Key)_config.LeftJoycon.ButtonZl]) buttons |= ControllerKeys.Zl; if (keyboard[(Key)_config.LeftJoycon.ButtonSl]) buttons |= ControllerKeys.SlLeft; - if (keyboard[(Key)_config.LeftJoycon.ButtonSr]) buttons |= ControllerKeys.SlRight; + if (keyboard[(Key)_config.LeftJoycon.ButtonSr]) buttons |= ControllerKeys.SrLeft; if (keyboard[(Key)_config.RightJoycon.StickButton]) buttons |= ControllerKeys.RStick; if (keyboard[(Key)_config.RightJoycon.ButtonA]) buttons |= ControllerKeys.A; @@ -57,7 +57,7 @@ namespace Ryujinx.Ui if (keyboard[(Key)_config.RightJoycon.ButtonPlus]) buttons |= ControllerKeys.Plus; if (keyboard[(Key)_config.RightJoycon.ButtonR]) buttons |= ControllerKeys.R; if (keyboard[(Key)_config.RightJoycon.ButtonZr]) buttons |= ControllerKeys.Zr; - if (keyboard[(Key)_config.RightJoycon.ButtonSl]) buttons |= ControllerKeys.SrLeft; + if (keyboard[(Key)_config.RightJoycon.ButtonSl]) buttons |= ControllerKeys.SlRight; if (keyboard[(Key)_config.RightJoycon.ButtonSr]) buttons |= ControllerKeys.SrRight; return buttons; From 3b200806378f393e3014d4f4fbc2948d80d00a3f Mon Sep 17 00:00:00 2001 From: Caian Benedicto Date: Fri, 22 Jan 2021 23:48:03 -0300 Subject: [PATCH 12/27] Fix inverted read only flag in transfer memory creation (#1945) --- .../SystemAppletProxy/ILibraryAppletCreator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs index b64da12f14..2cd2866ef4 100644 --- a/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs +++ b/Ryujinx.HLE/HOS/Services/Am/AppletAE/AllSystemAppletProxiesService/SystemAppletProxy/ILibraryAppletCreator.cs @@ -42,7 +42,7 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletAE.AllSystemAppletProxiesService.Sys // CreateTransferMemoryStorage(b8, u64, handle) -> object public ResultCode CreateTransferMemoryStorage(ServiceCtx context) { - bool isReadOnly = (context.RequestData.ReadInt64() & 1) != 0; + bool isReadOnly = (context.RequestData.ReadInt64() & 1) == 0; long size = context.RequestData.ReadInt64(); int handle = context.Request.HandleDesc.ToCopy[0]; From 6982282cc8ad362924bcb0c176ccb6e6d0339fa4 Mon Sep 17 00:00:00 2001 From: mageven <62494521+mageven@users.noreply.github.com> Date: Sat, 23 Jan 2021 17:59:14 +0530 Subject: [PATCH 13/27] TZ: Fix loop condition in GetTZName (#1950) Closes #1949 --- Ryujinx.HLE/HOS/Services/Time/TimeZone/TimeZone.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Ryujinx.HLE/HOS/Services/Time/TimeZone/TimeZone.cs b/Ryujinx.HLE/HOS/Services/Time/TimeZone/TimeZone.cs index 496c678680..c77c472e07 100644 --- a/Ryujinx.HLE/HOS/Services/Time/TimeZone/TimeZone.cs +++ b/Ryujinx.HLE/HOS/Services/Time/TimeZone/TimeZone.cs @@ -183,11 +183,10 @@ namespace Ryujinx.HLE.HOS.Services.Time.TimeZone { int i = namePosition; - char c = name[i]; + char c; - while (c != '\0' && !char.IsDigit(c) && c != ',' && c != '-' && c != '+') + while ((c = name[i]) != '\0' && !char.IsDigit(c) && c != ',' && c != '-' && c != '+') { - c = name[i]; i++; } From f565b0e5a6bebc09381aabb046e9b0b6285b7d10 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 23 Jan 2021 09:38:00 -0300 Subject: [PATCH 14/27] Match texture if the physical range is the same (#1934) * Match texture if the physical range is the same * XML docs and comments --- Ryujinx.Graphics.Gpu/Image/TextureManager.cs | 26 +++++++++++---- Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 33 ++++++++++++++++++++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs index 30137d0646..2646a75b7c 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs @@ -685,14 +685,28 @@ namespace Ryujinx.Graphics.Gpu.Image { Texture overlap = _textureOverlaps[index]; - bool rangeMatches = range != null ? overlap.Range.Equals(range.Value) : overlap.Info.GpuAddress == info.GpuAddress; - if (!rangeMatches) - { - continue; - } - TextureMatchQuality matchQuality = overlap.IsExactMatch(info, flags); + if (matchQuality != TextureMatchQuality.NoMatch) + { + // If the parameters match, we need to make sure the texture is mapped to the same memory regions. + + // If a range of memory was supplied, just check if the ranges match. + if (range != null && !overlap.Range.Equals(range.Value)) + { + continue; + } + + // If no range was supplied, we can check if the GPU virtual address match. If they do, + // we know the textures are located at the same memory region. + // If they don't, it may still be mapped to the same physical region, so we + // do a more expensive check to tell if they are mapped into the same physical regions. + if (overlap.Info.GpuAddress != info.GpuAddress && !_context.MemoryManager.CompareRange(overlap.Range, info.GpuAddress)) + { + continue; + } + } + if (matchQuality == TextureMatchQuality.Perfect) { texture = overlap; diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 7021cd2090..5776836c72 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -343,6 +343,39 @@ namespace Ryujinx.Graphics.Gpu.Memory return new MultiRange(regions.ToArray()); } + /// + /// Checks if a given GPU virtual memory range is mapped to the same physical regions + /// as the specified physical memory multi-range. + /// + /// Physical memory multi-range + /// GPU virtual memory address + /// True if the virtual memory region is mapped into the specified physical one, false otherwise + public bool CompareRange(MultiRange range, ulong va) + { + va &= ~PageMask; + + for (int i = 0; i < range.Count; i++) + { + MemoryRange currentRange = range.GetSubRange(i); + + ulong address = currentRange.Address & ~PageMask; + ulong endAddress = (currentRange.EndAddress + PageMask) & ~PageMask; + + while (address < endAddress) + { + if (Translate(va) != address) + { + return false; + } + + va += PageSize; + address += PageSize; + } + } + + return true; + } + /// /// Validates a GPU virtual address. /// From 30c7b77118994b245729099485185c0c5f77b234 Mon Sep 17 00:00:00 2001 From: Mary Date: Sun, 24 Jan 2021 14:29:16 +0100 Subject: [PATCH 15/27] Github Actions: Workaround windows-latest restore failures (#1957) See https://github.com/actions/setup-dotnet/issues/155. --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 596ed5b8a3..5bd3e4f13f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,6 +33,8 @@ jobs: - uses: actions/setup-dotnet@v1 with: dotnet-version: 5.0.x + - name: Clear + run: dotnet clean && dotnet nuget locals all --clear - name: Build run: dotnet build -c "${{ matrix.configuration }}" - name: Test From 8d4bee3ea90bc1e75006bdf64f859204991a37a2 Mon Sep 17 00:00:00 2001 From: ShahilSharma <67567036+ShahilSharma@users.noreply.github.com> Date: Sun, 24 Jan 2021 14:21:49 -0800 Subject: [PATCH 16/27] Update Controller Images (#1951) This updates the old Images used for the input section for Ryujinx. The old one didn't play nice with the some GTK themes like light mode, this new one does. --- .../Ui/Resources/Controller_JoyConLeft.svg | 329 +++-- .../Ui/Resources/Controller_JoyConPair.svg | 682 ++++++--- .../Ui/Resources/Controller_JoyConRight.svg | 423 ++++-- Ryujinx/Ui/Resources/Controller_ProCon.svg | 1224 +++++++++++++++-- 4 files changed, 2085 insertions(+), 573 deletions(-) diff --git a/Ryujinx/Ui/Resources/Controller_JoyConLeft.svg b/Ryujinx/Ui/Resources/Controller_JoyConLeft.svg index 40d06136b1..c3b82f8a26 100644 --- a/Ryujinx/Ui/Resources/Controller_JoyConLeft.svg +++ b/Ryujinx/Ui/Resources/Controller_JoyConLeft.svg @@ -1,105 +1,232 @@ - - - - diff --git a/Ryujinx/Ui/Resources/Controller_JoyConPair.svg b/Ryujinx/Ui/Resources/Controller_JoyConPair.svg index fca94d18f6..cf0f2bb09a 100644 --- a/Ryujinx/Ui/Resources/Controller_JoyConPair.svg +++ b/Ryujinx/Ui/Resources/Controller_JoyConPair.svg @@ -1,218 +1,476 @@ - - - - diff --git a/Ryujinx/Ui/Resources/Controller_JoyConRight.svg b/Ryujinx/Ui/Resources/Controller_JoyConRight.svg index 014c0ae3df..aa64330c9e 100644 --- a/Ryujinx/Ui/Resources/Controller_JoyConRight.svg +++ b/Ryujinx/Ui/Resources/Controller_JoyConRight.svg @@ -1,120 +1,311 @@ - - - - diff --git a/Ryujinx/Ui/Resources/Controller_ProCon.svg b/Ryujinx/Ui/Resources/Controller_ProCon.svg index 8c2b879fa3..e25122d483 100644 --- a/Ryujinx/Ui/Resources/Controller_ProCon.svg +++ b/Ryujinx/Ui/Resources/Controller_ProCon.svg @@ -1,149 +1,1085 @@ - - - - From f94acdb4efcf48555481f38417f8befa4ca560ad Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 24 Jan 2021 19:22:19 -0300 Subject: [PATCH 17/27] Allow out of bounds storage buffer access by aligning their sizes (#1870) * Allow out of bounds storage buffer access by aligning their sizes * Use correct size * Fix typo and comment on the reason for the change --- Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 15 +++++++++++++ Ryujinx.Graphics.Gpu/Memory/BufferManager.cs | 23 ++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index 7127871a79..cdd61b6d95 100644 --- a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -88,6 +88,21 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedDelegate = new Action(RegionModified); } + /// + /// Gets a sub-range from the buffer, from a start address till the end of the buffer. + /// + /// + /// This can be used to bind and use sub-ranges of the buffer on the host API. + /// + /// Start address of the sub-range, must be greater than or equal to the buffer address + /// The buffer sub-range + public BufferRange GetRange(ulong address) + { + ulong offset = address - Address; + + return new BufferRange(Handle, (int)offset, (int)(Size - offset)); + } + /// /// Gets a sub-range from the buffer. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index cdcc5a370c..08d52faa4b 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -591,7 +591,12 @@ namespace Ryujinx.Graphics.Gpu.Memory if (bounds.Address != 0) { - sRanges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)); + // The storage buffer size is not reliable (it might be lower than the actual size), + // so we bind the entire buffer to allow otherwise out of range accesses to work. + sRanges[bindingInfo.Binding] = GetBufferRangeTillEnd( + bounds.Address, + bounds.Size, + bounds.Flags.HasFlag(BufferUsageFlags.Write)); } } @@ -764,7 +769,9 @@ namespace Ryujinx.Graphics.Gpu.Memory if (bounds.Address != 0) { - ranges[bindingInfo.Binding] = GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)); + ranges[bindingInfo.Binding] = isStorage + ? GetBufferRangeTillEnd(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)) + : GetBufferRange(bounds.Address, bounds.Size, bounds.Flags.HasFlag(BufferUsageFlags.Write)); } } } @@ -895,6 +902,18 @@ namespace Ryujinx.Graphics.Gpu.Memory buffer.SignalModified(address, size); } + /// + /// Gets a buffer sub-range starting at a given memory address. + /// + /// Start address of the memory range + /// Size in bytes of the memory range + /// Whether the buffer will be written to by this use + /// The buffer sub-range starting at the given memory address + private BufferRange GetBufferRangeTillEnd(ulong address, ulong size, bool write = false) + { + return GetBuffer(address, size, write).GetRange(address); + } + /// /// Gets a buffer sub-range for a given memory range. /// From ddf1105bcb6c9884e1188d5f63f0890ef1806176 Mon Sep 17 00:00:00 2001 From: FICTURE7 Date: Mon, 25 Jan 2021 03:01:25 +0400 Subject: [PATCH 18/27] Add VCLZ.* fast path (#1917) * Add VCLZ fast path * Add VCLZ.8B/16B SSSE3 fast path * Add VCLZ.4H/8H SSSE3 fast path * Add VCLZ.2S/4S SSE2 fast path * Improve CLZ.4H/8H fast path * Improve CLZ.2S/4S fast path * Set PPTC version --- .../Instructions/InstEmitSimdArithmetic.cs | 147 +++++++++++++++++- .../Instructions/InstEmitSimdHelper.cs | 5 + ARMeilleure/Translation/PTC/Ptc.cs | 2 +- 3 files changed, 145 insertions(+), 9 deletions(-) diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index bd6a98bed8..f18b91cfcc 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -120,24 +120,155 @@ namespace ARMeilleure.Instructions { OpCodeSimd op = (OpCodeSimd)context.CurrOp; - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - int eSize = 8 << op.Size; - for (int index = 0; index < elems; index++) + Operand res = eSize switch { + 8 => Clz_V_I8 (context, GetVec(op.Rn)), + 16 => Clz_V_I16(context, GetVec(op.Rn)), + 32 => Clz_V_I32(context, GetVec(op.Rn)), + _ => null + }; + + if (res != null) { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + } + else + { + int elems = op.GetBytesCount() >> op.Size; - Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + res = context.VectorZero(); - res = EmitVectorInsert(context, res, de, index, op.Size); + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } } context.Copy(GetVec(op.Rd), res); } + private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg) + { + if (!Optimizations.UseSsse3) + { + return null; + } + + // CLZ nibble table. + Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04); + + Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f); + Operand c04 = X86GetAllElements(context, 0x04_04_04_04); + + // CLZ of low 4 bits of elements in arg. + Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg); + + // Get the high 4 bits of elements in arg. + Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4)); + hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow); + + // CLZ of high 4 bits of elements in arg. + Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg); + + // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits. + Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04); + loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); + + return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz); + } + + private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg) + { + if (!Optimizations.UseSsse3) + { + return null; + } + + Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01); + Operand maskLow = X86GetAllElements(context, 0x00ff_00ff); + Operand c0008 = X86GetAllElements(context, 0x0008_0008); + + // CLZ pair of high 8 and low 8 bits of elements in arg. + Operand hiloClz = Clz_V_I8(context, arg); + // Get CLZ of low 8 bits in each pair. + Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow); + // Get CLZ of high 8 bits in each pair. + Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap); + + // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits. + Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008); + loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); + + return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz); + } + + private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg) + { + // TODO: Use vplzcntd when AVX-512 is supported. + if (!Optimizations.UseSse2) + { + return null; + } + + Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1); + Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1); + Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8)); + Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1); + Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1); + Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne()); + + Operand c55555555 = X86GetAllElements(context, 0x55555555); + Operand c33333333 = X86GetAllElements(context, 0x33333333); + Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f); + Operand c0000003f = X86GetAllElements(context, 0x0000003f); + + Operand tmp0; + Operand tmp1; + Operand res; + + // Set all bits after highest set bit to 1. + res = OrVector(ShiftRightVectorUI32(arg, 1), arg); + res = OrVector(ShiftRightVectorUI32(res, 2), res); + res = OrVector(ShiftRightVectorUI32(res, 4), res); + res = OrVector(ShiftRightVectorUI32(res, 8), res); + res = OrVector(ShiftRightVectorUI32(res, 16), res); + + // Make leading 0s into leading 1s. + res = NotVector(res); + + // Count leading 1s, which is the population count. + tmp0 = ShiftRightVectorUI32(res, 1); + tmp0 = AndVector(tmp0, c55555555); + res = SubVectorI32(res, tmp0); + + tmp0 = ShiftRightVectorUI32(res, 2); + tmp0 = AndVector(tmp0, c33333333); + tmp1 = AndVector(res, c33333333); + res = AddVectorI32(tmp0, tmp1); + + tmp0 = ShiftRightVectorUI32(res, 4); + tmp0 = AddVectorI32(tmp0, res); + res = AndVector(tmp0, c0f0f0f0f); + + tmp0 = ShiftRightVectorUI32(res, 8); + res = AddVectorI32(tmp0, res); + + tmp0 = ShiftRightVectorUI32(res, 16); + res = AddVectorI32(tmp0, res); + + res = AndVector(res, c0000003f); + + return res; + } + public static void Cnt_V(ArmEmitterContext context) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index e9d5303c78..da8ccae78a 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -209,6 +209,11 @@ namespace ARMeilleure.Instructions } public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0) + { + return X86GetElements(context, (ulong)e1, (ulong)e0); + } + + public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0) { Operand vector0 = context.VectorCreateScalar(Const(e0)); Operand vector1 = context.VectorCreateScalar(Const(e1)); diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 8f250a5528..92094e6212 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1817; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1917; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; From ad491b5570ec428d0d87d56426b03125e2ca5220 Mon Sep 17 00:00:00 2001 From: EliEron Date: Mon, 25 Jan 2021 00:02:00 +0100 Subject: [PATCH 19/27] Prevent Display Sleep on Windows while running a game (#1850) Co-authored-by: EliEron --- Ryujinx.Common/System/DisplaySleep.cs | 35 +++++++++++++++++++++++++ Ryujinx/Ui/MainWindow.cs | 3 +++ Ryujinx/Ui/Windows/SettingsWindow.glade | 3 ++- 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 Ryujinx.Common/System/DisplaySleep.cs diff --git a/Ryujinx.Common/System/DisplaySleep.cs b/Ryujinx.Common/System/DisplaySleep.cs new file mode 100644 index 0000000000..77f9dd75b9 --- /dev/null +++ b/Ryujinx.Common/System/DisplaySleep.cs @@ -0,0 +1,35 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.System +{ + public class DisplaySleep + { + [Flags] + enum EXECUTION_STATE : uint + { + ES_CONTINUOUS = 0x80000000, + ES_DISPLAY_REQUIRED = 0x00000002, + ES_SYSTEM_REQUIRED = 0x00000001 + } + + [DllImport("kernel32.dll", CharSet = CharSet.Auto, SetLastError = true)] + static extern EXECUTION_STATE SetThreadExecutionState(EXECUTION_STATE esFlags); + + static public void Prevent() + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + SetThreadExecutionState(EXECUTION_STATE.ES_CONTINUOUS | EXECUTION_STATE.ES_SYSTEM_REQUIRED | EXECUTION_STATE.ES_DISPLAY_REQUIRED); + } + } + + static public void Restore() + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + SetThreadExecutionState(EXECUTION_STATE.ES_CONTINUOUS); + } + } + } +} diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index 1c847f4a68..3f2c2fb8a5 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -554,6 +554,8 @@ namespace Ryujinx.Ui _windowsMultimediaTimerResolution = new WindowsMultimediaTimerResolution(1); } + DisplaySleep.Prevent(); + GlRendererWidget = new GlRenderer(_emulationContext, ConfigurationState.Instance.Logger.GraphicsDebugLevel); Application.Invoke(delegate @@ -604,6 +606,7 @@ namespace Ryujinx.Ui _windowsMultimediaTimerResolution?.Dispose(); _windowsMultimediaTimerResolution = null; + DisplaySleep.Restore(); _viewBox.Remove(GlRendererWidget); _viewBox.Add(_gameTableWindow); diff --git a/Ryujinx/Ui/Windows/SettingsWindow.glade b/Ryujinx/Ui/Windows/SettingsWindow.glade index 97a88b229a..6457ecfeb9 100644 --- a/Ryujinx/Ui/Windows/SettingsWindow.glade +++ b/Ryujinx/Ui/Windows/SettingsWindow.glade @@ -1299,6 +1299,7 @@ True False + Change System Time end System Time: @@ -1509,7 +1510,7 @@ True False - Change System Region + Change Audio Backend end 5 Audio Backend: From b0d3f1d06ff669c9e7af68080ae260207b8b10fb Mon Sep 17 00:00:00 2001 From: EmulationFanatic <62343878+EmulationFanatic@users.noreply.github.com> Date: Mon, 25 Jan 2021 22:08:22 -0700 Subject: [PATCH 20/27] GUI Update: Fix controller input window to fit all images without scrolling (#1962) Currently, when configuring controller input with an "Xinput Controller" or "Unmapped Controller", the window does not fit the images for Pro Controller (width limited) or Joycon Pair (width and height limited). This PR proportionally enlarges the window so that no scrolling is ever necessary to fully see the controller image. --- Ryujinx/Ui/Windows/ControllerWindow.glade | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Ryujinx/Ui/Windows/ControllerWindow.glade b/Ryujinx/Ui/Windows/ControllerWindow.glade index 2143e9de8e..d1ba42f4a3 100644 --- a/Ryujinx/Ui/Windows/ControllerWindow.glade +++ b/Ryujinx/Ui/Windows/ControllerWindow.glade @@ -48,8 +48,8 @@ Ryujinx - Controller Settings True center - 1100 - 600 + 1150 + 690 From c19cfca183cbbae8091688a292242032c3f337cb Mon Sep 17 00:00:00 2001 From: mageven <62494521+mageven@users.noreply.github.com> Date: Tue, 26 Jan 2021 10:39:27 +0530 Subject: [PATCH 21/27] Implement PRFM (register variant) as NOP (#1956) * Implement PRFM (register variant) as NOP Fix typo pfrm -> prfm Add comments to distinguish variants * Increment PTC version --- ARMeilleure/Decoders/OpCodeTable.cs | 7 ++++--- ARMeilleure/Instructions/InstEmitMemoryEx.cs | 2 +- ARMeilleure/Instructions/InstName.cs | 2 +- ARMeilleure/Translation/PTC/Ptc.cs | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 928d0e0d6a..50fb132a25 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -144,9 +144,10 @@ namespace ARMeilleure.Decoders SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create); SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create); SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create); - SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, OpCodeMemImm.Create); - SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, OpCodeMemImm.Create); - SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pfrm, InstEmit.Pfrm, OpCodeMemLit.Create); + SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // immediate + SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // prfum (unscaled offset) + SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemLit.Create); // literal + SetA64("11111000101xxxxxxxxx10xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemReg.Create); // register SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, OpCodeAlu.Create); SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, OpCodeBReg.Create); SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, OpCodeAlu.Create); diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs index 977f23d384..95be4fcfe9 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryEx.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -102,7 +102,7 @@ namespace ARMeilleure.Instructions } } - public static void Pfrm(ArmEmitterContext context) + public static void Prfm(ArmEmitterContext context) { // Memory Prefetch, execute as no-op. } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 990e4393f1..ca2a63d99d 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -68,7 +68,7 @@ namespace ARMeilleure.Instructions Nop, Orn, Orr, - Pfrm, + Prfm, Rbit, Ret, Rev16, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 92094e6212..75a801e5f8 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1917; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1956; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; From e453ba69f42057d36559b2c84b6ad9f01eaf4c86 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Jan 2021 03:38:33 -0300 Subject: [PATCH 22/27] Add support for shader atomic min/max (S32) (#1948) --- .../CodeGen/Glsl/Declarations.cs | 16 +++++++++++++- .../AtomicMinMaxS32Shared.glsl | 21 +++++++++++++++++++ .../AtomicMinMaxS32Storage.glsl | 21 +++++++++++++++++++ .../HelperFunctions/HelperFunctionNames.cs | 3 +++ .../HelperFunctions/TexelFetchScale_cp.glsl | 6 ++++-- .../HelperFunctions/TexelFetchScale_fp.glsl | 13 ++++++++---- .../CodeGen/Glsl/Instructions/InstGen.cs | 14 ++++++++----- .../Glsl/Instructions/InstGenHelper.cs | 4 ++-- .../Ryujinx.Graphics.Shader.csproj | 2 ++ .../StructuredIr/HelperFunctionsMask.cs | 16 +++++++------- .../StructuredIr/StructuredProgram.cs | 8 +++++++ 11 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index d43fe6324d..a6109a9597 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -157,6 +157,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl"); + } + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); @@ -523,7 +533,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { string code = EmbeddedResources.ReadAllText(filename); - context.AppendLine(code.Replace("\t", CodeGenContext.Tab)); + code = code.Replace("\t", CodeGenContext.Tab); + code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); + code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix); + + context.AppendLine(code); context.AppendLine(); } } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl new file mode 100644 index 0000000000..9f8c641dff --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue); + return int(oldValue); +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl new file mode 100644 index 0000000000..fc3af6a73e --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue); + return int(oldValue); +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index 21c435475f..1ff127bb38 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class HelperFunctionNames { + public static string AtomicMaxS32 = "Helper_AtomicMaxS32"; + public static string AtomicMinS32 = "Helper_AtomicMinS32"; + public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl index 381566d37c..88d18246d3 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl @@ -1,6 +1,8 @@ -ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) { +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ float scale = cp_renderScale[samplerIndex]; - if (scale == 1.0) { + if (scale == 1.0) + { return inputVec; } return ivec2(vec2(inputVec) * scale); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl index 4efaa65af6..2e166a4be7 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl @@ -1,11 +1,16 @@ -ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) { +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ float scale = fp_renderScale[1 + samplerIndex]; - if (scale == 1.0) { + if (scale == 1.0) + { return inputVec; } - if (scale < 0.0) { // If less than 0, try interpolate between texels by using the screen position. + if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position. + { return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, -scale)); - } else { + } + else + { return ivec2(vec2(inputVec) * scale); } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index 388f0c2506..7d0f1aa583 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -42,13 +42,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions for (int argIndex = 0; argIndex < arity; argIndex++) { + // For shared memory access, the second argument is unused and should be ignored. + // It is there to make both storage and shared access have the same number of arguments. + if (argIndex == 1 && (inst & Instruction.MrMask) == Instruction.MrShared) + { + continue; + } + if (argIndex != 0) { args += ", "; } - VariableType dstType = GetSrcVarType(inst, argIndex); - if (argIndex == 0 && atomic) { Instruction memRegion = inst & Instruction.MrMask; @@ -60,12 +65,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions default: throw new InvalidOperationException($"Invalid memory region \"{memRegion}\"."); } - - // We use the first 2 operands above. - argIndex++; } else { + VariableType dstType = GetSrcVarType(inst, argIndex); + args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); } } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index 1b1efe9da3..5f5574c318 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -16,9 +16,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); - Add(Instruction.AtomicMaxS32, InstType.AtomicBinary, "atomicMax"); + Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32); Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); - Add(Instruction.AtomicMinS32, InstType.AtomicBinary, "atomicMin"); + Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32); Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index 28a031a2f3..2fa70c265b 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -9,6 +9,8 @@ + + diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index 53367fce14..af462a7f10 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -5,12 +5,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr [Flags] enum HelperFunctionsMask { - MultiplyHighS32 = 1 << 0, - MultiplyHighU32 = 1 << 1, - Shuffle = 1 << 2, - ShuffleDown = 1 << 3, - ShuffleUp = 1 << 4, - ShuffleXor = 1 << 5, - SwizzleAdd = 1 << 6 + AtomicMinMaxS32Shared = 1 << 0, + AtomicMinMaxS32Storage = 1 << 1, + MultiplyHighS32 = 1 << 2, + MultiplyHighU32 = 1 << 3, + Shuffle = 1 << 4, + ShuffleDown = 1 << 5, + ShuffleUp = 1 << 6, + ShuffleXor = 1 << 7, + SwizzleAdd = 1 << 8 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 733805cde5..8c73e698e4 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -244,6 +244,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // decide which helper functions are needed on the final generated code. switch (operation.Inst) { + case Instruction.AtomicMaxS32 | Instruction.MrShared: + case Instruction.AtomicMinS32 | Instruction.MrShared: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared; + break; + case Instruction.AtomicMaxS32 | Instruction.MrStorage: + case Instruction.AtomicMinS32 | Instruction.MrStorage: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage; + break; case Instruction.MultiplyHighS32: context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; break; From 9551bfdeebee663f119b4fbaa7196ce8d810f41a Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Jan 2021 14:27:18 -0300 Subject: [PATCH 23/27] Fix compute shader code dumping (#1960) --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 1dbe1805aa..f3e4679b8f 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -759,7 +759,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { byte[] code = _context.MemoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray(); - _dumper.Dump(code, compute: false, out string fullPath, out string codePath); + _dumper.Dump(code, translatorContext.Stage == ShaderStage.Compute, out string fullPath, out string codePath); ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo); From d1e24ba5c247bb9cfdeca7251bf5f8951c927576 Mon Sep 17 00:00:00 2001 From: mageven <62494521+mageven@users.noreply.github.com> Date: Tue, 26 Jan 2021 23:15:07 +0530 Subject: [PATCH 24/27] Initial Setup: Reload keys before verifying firmware (#1955) * Initial Setup: Reload keys before verifying firmware Also, display the NoKeys dialog if keyset is empty when verifying firmware. * LoadApplications: Remove the lone debug log and print the error directly --- Ryujinx.HLE/FileSystem/Content/ContentManager.cs | 9 +++++++++ Ryujinx/Ui/App/ApplicationLibrary.cs | 3 +-- Ryujinx/Ui/MainWindow.cs | 5 +++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Ryujinx.HLE/FileSystem/Content/ContentManager.cs b/Ryujinx.HLE/FileSystem/Content/ContentManager.cs index 4c4f3c8676..1630835d2c 100644 --- a/Ryujinx.HLE/FileSystem/Content/ContentManager.cs +++ b/Ryujinx.HLE/FileSystem/Content/ContentManager.cs @@ -653,6 +653,15 @@ namespace Ryujinx.HLE.FileSystem.Content public SystemVersion VerifyFirmwarePackage(string firmwarePackage) { + _virtualFileSystem.Reload(); + + // LibHac.NcaHeader's DecryptHeader doesn't check if HeaderKey is empty and throws InvalidDataException instead + // So, we check it early for a better user experience. + if (_virtualFileSystem.KeySet.HeaderKey.IsEmpty()) + { + throw new MissingKeyException("HeaderKey is empty. Cannot decrypt NCA headers."); + } + Dictionary> updateNcas = new Dictionary>(); if (Directory.Exists(firmwarePackage)) diff --git a/Ryujinx/Ui/App/ApplicationLibrary.cs b/Ryujinx/Ui/App/ApplicationLibrary.cs index fb0e066490..dcf49204b3 100644 --- a/Ryujinx/Ui/App/ApplicationLibrary.cs +++ b/Ryujinx/Ui/App/ApplicationLibrary.cs @@ -298,8 +298,7 @@ namespace Ryujinx.Ui.App } catch (Exception exception) { - Logger.Warning?.Print(LogClass.Application, $"The file encountered was not of a valid type. Errored File: {applicationPath}"); - Logger.Debug?.Print(LogClass.Application, exception.ToString()); + Logger.Warning?.Print(LogClass.Application, $"The file encountered was not of a valid type. File: '{applicationPath}' Error: {exception}"); numApplicationsFound--; _loadingError = true; diff --git a/Ryujinx/Ui/MainWindow.cs b/Ryujinx/Ui/MainWindow.cs index 3f2c2fb8a5..7697376bd5 100644 --- a/Ryujinx/Ui/MainWindow.cs +++ b/Ryujinx/Ui/MainWindow.cs @@ -1035,6 +1035,11 @@ namespace Ryujinx.Ui thread.Start(); } } + catch (LibHac.MissingKeyException ex) + { + Logger.Error?.Print(LogClass.Application, ex.ToString()); + UserErrorDialog.CreateUserErrorDialog(UserError.NoKeys); + } catch (Exception ex) { GtkDialog.CreateErrorDialog(ex.Message); From d6bd0470fb0507cc9c6069e577ae2814e614265b Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Jan 2021 18:42:12 -0300 Subject: [PATCH 25/27] Fix conditional rendering without queries (#1965) --- .../Engine/MethodConditionalRendering.cs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs index 7e7964c4bf..0d7c272c79 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs @@ -71,11 +71,6 @@ namespace Ryujinx.Graphics.Gpu.Engine ICounterEvent evt = FindEvent(gpuVa); ICounterEvent evt2 = FindEvent(gpuVa + 16); - if (evt == null && evt2 == null) - { - return ConditionalRenderEnabled.False; - } - bool useHost; if (evt != null && evt2 == null) @@ -86,10 +81,14 @@ namespace Ryujinx.Graphics.Gpu.Engine { useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryManager.Read(gpuVa), isEqual); } - else + else if (evt != null && evt2 != null) { useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, evt2, isEqual); } + else + { + useHost = false; + } if (useHost) { From caf049ed15f1c22d55aacfab79019538b2587e11 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Jan 2021 18:44:07 -0300 Subject: [PATCH 26/27] Avoid some redundant GL calls (#1958) --- Ryujinx.Graphics.GAL/IPipeline.cs | 3 +- .../VertexAttribDescriptor.cs | 22 ++++- Ryujinx.Graphics.Gpu/Engine/Methods.cs | 8 +- Ryujinx.Graphics.OpenGL/Framebuffer.cs | 11 ++- Ryujinx.Graphics.OpenGL/Pipeline.cs | 34 +++---- Ryujinx.Graphics.OpenGL/VertexArray.cs | 88 +++++++++++++------ 6 files changed, 115 insertions(+), 51 deletions(-) diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs index 96ccfb2859..bb3a8dcac9 100644 --- a/Ryujinx.Graphics.GAL/IPipeline.cs +++ b/Ryujinx.Graphics.GAL/IPipeline.cs @@ -68,8 +68,7 @@ namespace Ryujinx.Graphics.GAL void SetSampler(int binding, ISampler sampler); - void SetScissorEnable(int index, bool enable); - void SetScissor(int index, int x, int y, int width, int height); + void SetScissor(int index, bool enable, int x, int y, int width, int height); void SetStencilTest(StencilTestDescriptor stencilTest); diff --git a/Ryujinx.Graphics.GAL/VertexAttribDescriptor.cs b/Ryujinx.Graphics.GAL/VertexAttribDescriptor.cs index 1547658e47..b3248b621f 100644 --- a/Ryujinx.Graphics.GAL/VertexAttribDescriptor.cs +++ b/Ryujinx.Graphics.GAL/VertexAttribDescriptor.cs @@ -1,6 +1,8 @@ +using System; + namespace Ryujinx.Graphics.GAL { - public struct VertexAttribDescriptor + public struct VertexAttribDescriptor : IEquatable { public int BufferIndex { get; } public int Offset { get; } @@ -16,5 +18,23 @@ namespace Ryujinx.Graphics.GAL IsZero = isZero; Format = format; } + + public override bool Equals(object obj) + { + return obj is VertexAttribDescriptor other && Equals(other); + } + + public bool Equals(VertexAttribDescriptor other) + { + return BufferIndex == other.BufferIndex && + Offset == other.Offset && + IsZero == other.IsZero && + Format == other.Format; + } + + public override int GetHashCode() + { + return HashCode.Combine(BufferIndex, Offset, IsZero, Format); + } } } diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs index d6bd51106c..a41fd5414d 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs @@ -436,8 +436,6 @@ namespace Ryujinx.Graphics.Gpu.Engine bool enable = scissor.Enable && (scissor.X1 != 0 || scissor.Y1 != 0 || scissor.X2 != 0xffff || scissor.Y2 != 0xffff); - _context.Renderer.Pipeline.SetScissorEnable(index, enable); - if (enable) { int x = scissor.X1; @@ -454,7 +452,11 @@ namespace Ryujinx.Graphics.Gpu.Engine height = (int)Math.Ceiling(height * scale); } - _context.Renderer.Pipeline.SetScissor(index, x, y, width, height); + _context.Renderer.Pipeline.SetScissor(index, true, x, y, width, height); + } + else + { + _context.Renderer.Pipeline.SetScissor(index, false, 0, 0, 0, 0); } } } diff --git a/Ryujinx.Graphics.OpenGL/Framebuffer.cs b/Ryujinx.Graphics.OpenGL/Framebuffer.cs index 015b0ec0ae..66bf892b31 100644 --- a/Ryujinx.Graphics.OpenGL/Framebuffer.cs +++ b/Ryujinx.Graphics.OpenGL/Framebuffer.cs @@ -2,6 +2,7 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.OpenGL.Image; using System; +using System.Runtime.CompilerServices; namespace Ryujinx.Graphics.OpenGL { @@ -29,21 +30,27 @@ namespace Ryujinx.Graphics.OpenGL return Handle; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public void AttachColor(int index, TextureView color) { + if (_colors[index] == color) + { + return; + } + FramebufferAttachment attachment = FramebufferAttachment.ColorAttachment0 + index; if (HwCapabilities.Vendor == HwCapabilities.GpuVendor.Amd || HwCapabilities.Vendor == HwCapabilities.GpuVendor.Intel) { GL.FramebufferTexture(FramebufferTarget.Framebuffer, attachment, color?.GetIncompatibleFormatViewHandle() ?? 0, 0); - - _colors[index] = color; } else { GL.FramebufferTexture(FramebufferTarget.Framebuffer, attachment, color?.Handle ?? 0, 0); } + + _colors[index] = color; } public void AttachDepthStencil(TextureView depthStencil) diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs index b6a34e9cb8..f42187bdf7 100644 --- a/Ryujinx.Graphics.OpenGL/Pipeline.cs +++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs @@ -43,7 +43,7 @@ namespace Ryujinx.Graphics.OpenGL private readonly uint[] _componentMasks; - private bool _scissor0Enable = false; + private uint _scissorEnables; private bool _tfEnabled; private TransformFeedbackPrimitiveType _tfTopology; @@ -883,25 +883,27 @@ namespace Ryujinx.Graphics.OpenGL ((Sampler)sampler).Bind(binding); } - public void SetScissorEnable(int index, bool enable) + public void SetScissor(int index, bool enable, int x, int y, int width, int height) { - if (enable) + uint mask = 1u << index; + + if (!enable) { + if ((_scissorEnables & mask) != 0) + { + _scissorEnables &= ~mask; + GL.Disable(IndexedEnableCap.ScissorTest, index); + } + + return; + } + + if ((_scissorEnables & mask) == 0) + { + _scissorEnables |= mask; GL.Enable(IndexedEnableCap.ScissorTest, index); } - else - { - GL.Disable(IndexedEnableCap.ScissorTest, index); - } - if (index == 0) - { - _scissor0Enable = enable; - } - } - - public void SetScissor(int index, int x, int y, int width, int height) - { GL.ScissorIndexed(index, x, y, width, height); } @@ -1241,7 +1243,7 @@ namespace Ryujinx.Graphics.OpenGL public void RestoreScissor0Enable() { - if (_scissor0Enable) + if ((_scissorEnables & 1u) != 0) { GL.Enable(IndexedEnableCap.ScissorTest, 0); } diff --git a/Ryujinx.Graphics.OpenGL/VertexArray.cs b/Ryujinx.Graphics.OpenGL/VertexArray.cs index 64c6a82198..17703cd129 100644 --- a/Ryujinx.Graphics.OpenGL/VertexArray.cs +++ b/Ryujinx.Graphics.OpenGL/VertexArray.cs @@ -1,6 +1,7 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Graphics.GAL; using System; +using System.Runtime.CompilerServices; namespace Ryujinx.Graphics.OpenGL { @@ -16,6 +17,9 @@ namespace Ryujinx.Graphics.OpenGL private int _vertexAttribsCount; private int _vertexBuffersCount; + private uint _vertexAttribsInUse; + private uint _vertexBuffersInUse; + public VertexArray() { Handle = GL.GenVertexArray(); @@ -31,30 +35,30 @@ namespace Ryujinx.Graphics.OpenGL public void SetVertexBuffers(ReadOnlySpan vertexBuffers) { - int bindingIndex = 0; - - for (int index = 0; index < vertexBuffers.Length; index++) + int bindingIndex; + for (bindingIndex = 0; bindingIndex < vertexBuffers.Length; bindingIndex++) { - VertexBufferDescriptor vb = vertexBuffers[index]; + VertexBufferDescriptor vb = vertexBuffers[bindingIndex]; if (vb.Buffer.Handle != BufferHandle.Null) { GL.BindVertexBuffer(bindingIndex, vb.Buffer.Handle.ToInt32(), (IntPtr)vb.Buffer.Offset, vb.Stride); - GL.VertexBindingDivisor(bindingIndex, vb.Divisor); + _vertexBuffersInUse |= 1u << bindingIndex; } else { - GL.BindVertexBuffer(bindingIndex, 0, IntPtr.Zero, 0); + if ((_vertexBuffersInUse & (1u << bindingIndex)) != 0) + { + GL.BindVertexBuffer(bindingIndex, 0, IntPtr.Zero, 0); + _vertexBuffersInUse &= ~(1u << bindingIndex); + } } - _vertexBuffers[index] = vb; - - bindingIndex++; + _vertexBuffers[bindingIndex] = vb; } _vertexBuffersCount = bindingIndex; - _needsAttribsUpdate = true; } @@ -66,17 +70,22 @@ namespace Ryujinx.Graphics.OpenGL { VertexAttribDescriptor attrib = vertexAttribs[index]; + if (attrib.Equals(_vertexAttribs[index])) + { + continue; + } + FormatInfo fmtInfo = FormatTable.GetFormatInfo(attrib.Format); if (attrib.IsZero) { // Disabling the attribute causes the shader to read a constant value. // The value is configurable, but by default is a vector of (0, 0, 0, 1). - GL.DisableVertexAttribArray(index); + DisableVertexAttrib(index); } else { - GL.EnableVertexAttribArray(index); + EnableVertexAttrib(index); } int offset = attrib.Offset; @@ -107,7 +116,7 @@ namespace Ryujinx.Graphics.OpenGL for (; index < Constants.MaxVertexAttribs; index++) { - GL.DisableVertexAttribArray(index); + DisableVertexAttrib(index); } } @@ -122,29 +131,54 @@ namespace Ryujinx.Graphics.OpenGL { VertexAttribDescriptor attrib = _vertexAttribs[attribIndex]; - if ((uint)attrib.BufferIndex >= _vertexBuffersCount) + if (!attrib.IsZero) { - GL.DisableVertexAttribArray(attribIndex); + if ((uint)attrib.BufferIndex >= _vertexBuffersCount) + { + DisableVertexAttrib(attribIndex); + continue; + } - continue; - } + if (_vertexBuffers[attrib.BufferIndex].Buffer.Handle == BufferHandle.Null) + { + DisableVertexAttrib(attribIndex); + continue; + } - if (_vertexBuffers[attrib.BufferIndex].Buffer.Handle == BufferHandle.Null) - { - GL.DisableVertexAttribArray(attribIndex); - - continue; - } - - if (_needsAttribsUpdate && !attrib.IsZero) - { - GL.EnableVertexAttribArray(attribIndex); + if (_needsAttribsUpdate) + { + EnableVertexAttrib(attribIndex); + } } } _needsAttribsUpdate = false; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void EnableVertexAttrib(int index) + { + uint mask = 1u << index; + + if ((_vertexAttribsInUse & mask) == 0) + { + _vertexAttribsInUse |= mask; + GL.EnableVertexAttribArray(index); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void DisableVertexAttrib(int index) + { + uint mask = 1u << index; + + if ((_vertexAttribsInUse & mask) != 0) + { + _vertexAttribsInUse &= ~mask; + GL.DisableVertexAttribArray(index); + } + } + public void Dispose() { if (Handle != 0) From a8e9dd2f839bdcd20ec9b32b8647ce2a83e50ecb Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 26 Jan 2021 21:26:23 -0300 Subject: [PATCH 27/27] Fix regression on shader atomic SSBO operations (#1967) * Fix regression on shader atomic SSBO operations * Update comment --- Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index 7d0f1aa583..622ac646ea 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -44,7 +44,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { // For shared memory access, the second argument is unused and should be ignored. // It is there to make both storage and shared access have the same number of arguments. - if (argIndex == 1 && (inst & Instruction.MrMask) == Instruction.MrShared) + // For storage, both inputs are consumed when the argument index is 0, so we should skip it here. + if (argIndex == 1 && (atomic || (inst & Instruction.MrMask) == Instruction.MrShared)) { continue; }