GPU: Add HLE macros for popular NVN macros (#5761)

* GPU: Add HLE macros for popular NVN macros

* Remove non-vector equality check

The case where it's not hardware accelerated will do the check integer-wise anyways.

* Whitespace 😔

* Address Feedback
This commit is contained in:
riperiperi 2023-10-06 23:55:07 +01:00 committed by GitHub
parent 086564c3c8
commit f460ecc182
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 467 additions and 14 deletions

View file

@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.Engine
/// </summary>
public ref TState State => ref _state.State;
/// <summary>
/// Current shadow state.
/// </summary>
public ref TState ShadowState => ref _shadowState.State;
/// <summary>
/// Creates a new instance of the device state, with shadow state.
/// </summary>

View file

@ -1,7 +1,10 @@
using Ryujinx.Common.Logging;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using System;
using System.Collections.Generic;
@ -15,9 +18,18 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
private const int ColorLayerCountOffset = 0x818;
private const int ColorStructSize = 0x40;
private const int ZetaLayerCountOffset = 0x1230;
private const int UniformBufferBindVertexOffset = 0x2410;
private const int FirstVertexOffset = 0x1434;
private const int IndirectIndexedDataEntrySize = 0x14;
private const int LogicOpOffset = 0x19c4;
private const int ShaderIdScratchOffset = 0x3470;
private const int ShaderAddressScratchOffset = 0x3488;
private const int UpdateConstantBufferAddressesBase = 0x34a8;
private const int UpdateConstantBufferSizesBase = 0x34bc;
private const int UpdateConstantBufferAddressCbu = 0x3460;
private readonly GPFifoProcessor _processor;
private readonly MacroHLEFunctionName _functionName;
@ -49,6 +61,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
{
switch (_functionName)
{
case MacroHLEFunctionName.BindShaderProgram:
BindShaderProgram(state, arg0);
break;
case MacroHLEFunctionName.ClearColor:
ClearColor(state, arg0);
break;
@ -58,6 +73,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
case MacroHLEFunctionName.DrawArraysInstanced:
DrawArraysInstanced(state, arg0);
break;
case MacroHLEFunctionName.DrawElements:
DrawElements(state, arg0);
break;
case MacroHLEFunctionName.DrawElementsInstanced:
DrawElementsInstanced(state, arg0);
break;
@ -67,6 +85,21 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
MultiDrawElementsIndirectCount(state, arg0);
break;
case MacroHLEFunctionName.UpdateBlendState:
UpdateBlendState(state, arg0);
break;
case MacroHLEFunctionName.UpdateColorMasks:
UpdateColorMasks(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferState:
UpdateUniformBufferState(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbu:
UpdateUniformBufferStateCbu(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbuV2:
UpdateUniformBufferStateCbuV2(state, arg0);
break;
default:
throw new NotImplementedException(_functionName.ToString());
}
@ -75,6 +108,149 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
Fifo.Clear();
}
/// <summary>
/// Binds a shader program with the index in arg0.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void BindShaderProgram(IDeviceState state, int arg0)
{
int scratchOffset = ShaderIdScratchOffset + arg0 * 4;
int lastId = state.Read(scratchOffset);
int id = FetchParam().Word;
int offset = FetchParam().Word;
if (lastId == id)
{
FetchParam();
FetchParam();
return;
}
_processor.ThreedClass.SetShaderOffset(arg0, (uint)offset);
// Removes overflow on the method address into the increment portion.
// Present in the original macro.
int addrMask = unchecked((int)0xfffc0fff) << 2;
state.Write(scratchOffset & addrMask, id);
state.Write((ShaderAddressScratchOffset + arg0 * 4) & addrMask, offset);
int stage = FetchParam().Word;
uint cbAddress = (uint)FetchParam().Word;
_processor.ThreedClass.UpdateUniformBufferState(65536, cbAddress >> 24, cbAddress << 8);
int stageOffset = (stage & 0x7f) << 3;
state.Write((UniformBufferBindVertexOffset + stageOffset * 4) & addrMask, 17);
}
/// <summary>
/// Updates uniform buffer state for update or bind.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferState(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressesBase + arg0 * 4);
int size = state.Read(UpdateConstantBufferSizesBase + arg0 * 4);
_processor.ThreedClass.UpdateUniformBufferState(size, address >> 24, address << 8);
}
/// <summary>
/// Updates uniform buffer state for update.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferStateCbu(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 24320,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
/// <summary>
/// Updates uniform buffer state for update.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferStateCbuV2(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 28672,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
/// <summary>
/// Updates blend enable using the given argument.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateBlendState(IDeviceState state, int arg0)
{
state.Write(LogicOpOffset, 0);
Array8<Boolean32> enable = new();
for (int i = 0; i < 8; i++)
{
enable[i] = new Boolean32((uint)(arg0 >> (i + 8)) & 1);
}
_processor.ThreedClass.UpdateBlendEnable(ref enable);
}
/// <summary>
/// Updates color masks using the given argument and three pushed arguments.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateColorMasks(IDeviceState state, int arg0)
{
Array8<RtColorMask> masks = new();
int index = 0;
for (int i = 0; i < 4; i++)
{
masks[index++] = new RtColorMask((uint)arg0 & 0x1fff);
masks[index++] = new RtColorMask(((uint)arg0 >> 16) & 0x1fff);
if (i != 3)
{
arg0 = FetchParam().Word;
}
}
_processor.ThreedClass.UpdateColorMasks(ref masks);
}
/// <summary>
/// Clears one bound color target.
/// </summary>
@ -129,6 +305,36 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
indexed: false);
}
/// <summary>
/// Performs a indexed draw.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void DrawElements(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var indexAddressHigh = FetchParam();
var indexAddressLow = FetchParam();
var indexType = FetchParam();
var firstIndex = 0;
var indexCount = FetchParam();
_processor.ThreedClass.UpdateIndexBuffer(
(uint)indexAddressHigh.Word,
(uint)indexAddressLow.Word,
(IndexType)indexType.Word);
_processor.ThreedClass.Draw(
topology,
indexCount.Word,
1,
firstIndex,
state.Read(FirstVertexOffset),
0,
indexed: true);
}
/// <summary>
/// Performs a indexed draw.
/// </summary>

View file

@ -6,11 +6,19 @@
enum MacroHLEFunctionName
{
None,
BindShaderProgram,
ClearColor,
ClearDepthStencil,
DrawArraysInstanced,
DrawElements,
DrawElementsInstanced,
DrawElementsIndirect,
MultiDrawElementsIndirectCount,
UpdateBlendState,
UpdateColorMasks,
UpdateUniformBufferState,
UpdateUniformBufferStateCbu,
UpdateUniformBufferStateCbuV2
}
}

View file

@ -46,12 +46,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
private static readonly TableEntry[] _table = new TableEntry[]
{
new(MacroHLEFunctionName.BindShaderProgram, new Hash128(0x5d5efb912369f60b, 0x69131ed5019f08ef), 0x68),
new(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28),
new(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24),
new(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48),
new(MacroHLEFunctionName.DrawElements, new Hash128(0x3D7F32AE6C2702A7, 0x9353C9F41C1A244D), 0x20),
new(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c),
new(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c),
new(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C),
new(MacroHLEFunctionName.UpdateBlendState, new Hash128(0x40F6D4E7B08D7640, 0x82167BEEAECB959F), 0x28),
new(MacroHLEFunctionName.UpdateColorMasks, new Hash128(0x9EE32420B8441DFD, 0x6E7724759A57333E), 0x24),
new(MacroHLEFunctionName.UpdateUniformBufferState, new Hash128(0x8EE66706049CB0B0, 0x51C1CF906EC86F7C), 0x20),
new(MacroHLEFunctionName.UpdateUniformBufferStateCbu, new Hash128(0xA4592676A3E581A0, 0xA39E77FE19FE04AC), 0x18),
new(MacroHLEFunctionName.UpdateUniformBufferStateCbuV2, new Hash128(0x392FA750489983D4, 0x35BACE455155D2C3), 0x18)
};
/// <summary>
@ -62,18 +69,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <returns>True if the host supports the HLE macro, false otherwise</returns>
private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
{
if (name == MacroHLEFunctionName.ClearColor ||
name == MacroHLEFunctionName.ClearDepthStencil ||
name == MacroHLEFunctionName.DrawArraysInstanced ||
name == MacroHLEFunctionName.DrawElementsInstanced ||
name == MacroHLEFunctionName.DrawElementsIndirect)
{
return true;
}
else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
{
return caps.SupportsIndirectParameters;
}
else if (name != MacroHLEFunctionName.None)
{
return true;
}
return false;
}

View file

@ -10,4 +10,22 @@ namespace Ryujinx.Graphics.Gpu.Engine
MethodPassthrough = 2,
MethodReplay = 3,
}
static class SetMmeShadowRamControlModeExtensions
{
public static bool IsTrack(this SetMmeShadowRamControlMode mode)
{
return mode == SetMmeShadowRamControlMode.MethodTrack || mode == SetMmeShadowRamControlMode.MethodTrackWithFilter;
}
public static bool IsPassthrough(this SetMmeShadowRamControlMode mode)
{
return mode == SetMmeShadowRamControlMode.MethodPassthrough;
}
public static bool IsReplay(this SetMmeShadowRamControlMode mode)
{
return mode == SetMmeShadowRamControlMode.MethodReplay;
}
}
}

View file

@ -17,9 +17,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
class StateUpdater
{
public const int ShaderStateIndex = 26;
public const int RtColorMaskIndex = 14;
public const int RasterizerStateIndex = 15;
public const int ScissorStateIndex = 16;
public const int VertexBufferStateIndex = 0;
public const int BlendStateIndex = 2;
public const int IndexBufferStateIndex = 23;
public const int PrimitiveRestartStateIndex = 12;
public const int RenderTargetStateIndex = 27;

View file

@ -1,12 +1,15 @@
using Ryujinx.Graphics.Device;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
using Ryujinx.Graphics.Gpu.Engine.Threed.Blender;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Synchronization;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
@ -26,6 +29,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
private readonly ConstantBufferUpdater _cbUpdater;
private readonly StateUpdater _stateUpdater;
private SetMmeShadowRamControlMode ShadowMode => _state.State.SetMmeShadowRamControlMode;
/// <summary>
/// Creates a new instance of the 3D engine class.
/// </summary>
@ -228,6 +233,206 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_cbUpdater.Update(data);
}
/// <summary>
/// Test if two 32 byte structs are equal.
/// </summary>
/// <typeparam name="T">Type of the 32-byte struct</typeparam>
/// <param name="lhs">First struct</param>
/// <param name="rhs">Second struct</param>
/// <returns>True if equal, false otherwise</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool UnsafeEquals32Byte<T>(ref T lhs, ref T rhs) where T : unmanaged
{
if (Vector256.IsHardwareAccelerated)
{
return Vector256.EqualsAll(
Unsafe.As<T, Vector256<uint>>(ref lhs),
Unsafe.As<T, Vector256<uint>>(ref rhs)
);
}
else
{
ref var lhsVec = ref Unsafe.As<T, Vector128<uint>>(ref lhs);
ref var rhsVec = ref Unsafe.As<T, Vector128<uint>>(ref rhs);
return Vector128.EqualsAll(lhsVec, rhsVec) &&
Vector128.EqualsAll(Unsafe.Add(ref lhsVec, 1), Unsafe.Add(ref rhsVec, 1));
}
}
/// <summary>
/// Updates blend enable. Respects current shadow mode.
/// </summary>
/// <param name="masks">Blend enable</param>
public void UpdateBlendEnable(ref Array8<Boolean32> enable)
{
var shadow = ShadowMode;
ref var state = ref _state.State.BlendEnable;
if (shadow.IsReplay())
{
enable = _state.ShadowState.BlendEnable;
}
if (!UnsafeEquals32Byte(ref enable, ref state))
{
state = enable;
_stateUpdater.ForceDirty(StateUpdater.BlendStateIndex);
}
if (shadow.IsTrack())
{
_state.ShadowState.BlendEnable = enable;
}
}
/// <summary>
/// Updates color masks. Respects current shadow mode.
/// </summary>
/// <param name="masks">Color masks</param>
public void UpdateColorMasks(ref Array8<RtColorMask> masks)
{
var shadow = ShadowMode;
ref var state = ref _state.State.RtColorMask;
if (shadow.IsReplay())
{
masks = _state.ShadowState.RtColorMask;
}
if (!UnsafeEquals32Byte(ref masks, ref state))
{
state = masks;
_stateUpdater.ForceDirty(StateUpdater.RtColorMaskIndex);
}
if (shadow.IsTrack())
{
_state.ShadowState.RtColorMask = masks;
}
}
/// <summary>
/// Updates index buffer state for an indexed draw. Respects current shadow mode.
/// </summary>
/// <param name="addrHigh">High part of the address</param>
/// <param name="addrLow">Low part of the address</param>
/// <param name="type">Type of the binding</param>
public void UpdateIndexBuffer(uint addrHigh, uint addrLow, IndexType type)
{
var shadow = ShadowMode;
ref var state = ref _state.State.IndexBufferState;
if (shadow.IsReplay())
{
ref var shadowState = ref _state.ShadowState.IndexBufferState;
addrHigh = shadowState.Address.High;
addrLow = shadowState.Address.Low;
type = shadowState.Type;
}
if (state.Address.High != addrHigh || state.Address.Low != addrLow || state.Type != type)
{
state.Address.High = addrHigh;
state.Address.Low = addrLow;
state.Type = type;
_stateUpdater.ForceDirty(StateUpdater.IndexBufferStateIndex);
}
if (shadow.IsTrack())
{
ref var shadowState = ref _state.ShadowState.IndexBufferState;
shadowState.Address.High = addrHigh;
shadowState.Address.Low = addrLow;
shadowState.Type = type;
}
}
/// <summary>
/// Updates uniform buffer state for update or bind. Respects current shadow mode.
/// </summary>
/// <param name="size">Size of the binding</param>
/// <param name="addrHigh">High part of the addrsss</param>
/// <param name="addrLow">Low part of the address</param>
public void UpdateUniformBufferState(int size, uint addrHigh, uint addrLow)
{
var shadow = ShadowMode;
ref var state = ref _state.State.UniformBufferState;
if (shadow.IsReplay())
{
ref var shadowState = ref _state.ShadowState.UniformBufferState;
size = shadowState.Size;
addrHigh = shadowState.Address.High;
addrLow = shadowState.Address.Low;
}
state.Size = size;
state.Address.High = addrHigh;
state.Address.Low = addrLow;
if (shadow.IsTrack())
{
ref var shadowState = ref _state.ShadowState.UniformBufferState;
shadowState.Size = size;
shadowState.Address.High = addrHigh;
shadowState.Address.Low = addrLow;
}
}
/// <summary>
/// Updates a shader offset. Respects current shadow mode.
/// </summary>
/// <param name="index">Index of the shader to update</param>
/// <param name="offset">Offset to update with</param>
public void SetShaderOffset(int index, uint offset)
{
var shadow = ShadowMode;
ref var shaderState = ref _state.State.ShaderState[index];
if (shadow.IsReplay())
{
offset = _state.ShadowState.ShaderState[index].Offset;
}
if (shaderState.Offset != offset)
{
shaderState.Offset = offset;
_stateUpdater.ForceDirty(StateUpdater.ShaderStateIndex);
}
if (shadow.IsTrack())
{
_state.ShadowState.ShaderState[index].Offset = offset;
}
}
/// <summary>
/// Updates uniform buffer state for update. Respects current shadow mode.
/// </summary>
/// <param name="ubState">Uniform buffer state</param>
public void UpdateUniformBufferState(UniformBufferState ubState)
{
var shadow = ShadowMode;
ref var state = ref _state.State.UniformBufferState;
if (shadow.IsReplay())
{
ubState = _state.ShadowState.UniformBufferState;
}
state = ubState;
if (shadow.IsTrack())
{
_state.ShadowState.UniformBufferState = ubState;
}
}
/// <summary>
/// Launches the Inline-to-Memory DMA copy operation.
/// </summary>

View file

@ -590,9 +590,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
struct RtColorMask
{
#pragma warning disable CS0649 // Field is never assigned to
public uint Packed;
#pragma warning restore CS0649
public RtColorMask(uint packed)
{
Packed = packed;
}
/// <summary>
/// Unpacks red channel enable.

View file

@ -5,9 +5,12 @@
/// </summary>
readonly struct Boolean32
{
#pragma warning disable CS0649 // Field is never assigned to
private readonly uint _value;
#pragma warning restore CS0649
public Boolean32(uint value)
{
_value = value;
}
public static implicit operator bool(Boolean32 value)
{