From 6a98c643cabeea25dc42e19fe475a687a034a532 Mon Sep 17 00:00:00 2001 From: gdk Date: Sat, 30 Nov 2019 23:53:09 -0300 Subject: [PATCH] Add a pass to turn global memory access into storage access, and do all storage related transformations on IR --- Ryujinx.Graphics.GAL/Capabilities.cs | 9 +- Ryujinx.Graphics.Gpu/GpuContext.cs | 7 +- Ryujinx.Graphics.Gpu/Memory/BufferManager.cs | 11 +- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 11 +- Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 13 +- Ryujinx.Graphics.OpenGL/Renderer.cs | 4 +- .../CodeGen/Glsl/Declarations.cs | 33 +--- .../CodeGen/Glsl/DefaultNames.cs | 2 - .../Glsl/HelperFunctions/GlobalMemory.glsl | 18 -- .../HelperFunctions/HelperFunctionNames.cs | 2 - .../CodeGen/Glsl/Instructions/InstGen.cs | 12 +- .../Glsl/Instructions/InstGenMemory.cs | 65 +------ .../CodeGen/Glsl/Instructions/InstType.cs | 8 +- .../Decoders/IntegerSize.cs | 15 +- .../Instructions/InstEmitMemory.cs | 167 +++++++++++++++--- .../Ryujinx.Graphics.Shader.csproj | 1 - Ryujinx.Graphics.Shader/ShaderCapabilities.cs | 16 ++ Ryujinx.Graphics.Shader/ShaderConfig.cs | 12 +- .../StructuredIr/HelperFunctionsMask.cs | 11 +- .../StructuredIr/InstructionInfo.cs | 24 +-- .../StructuredIr/StructuredProgram.cs | 29 ++- .../Translation/EmitterContextInsts.cs | 48 ++--- .../Translation/GlobalMemory.cs | 46 +++++ .../Translation/Lowering.cs | 121 +++++++++++++ .../Optimizations/GlobalToStorage.cs | 98 +++++----- .../Translation/Optimizations/Optimizer.cs | 4 +- .../Translation/Translator.cs | 22 ++- Ryujinx.ShaderTools/Program.cs | 5 +- 28 files changed, 532 insertions(+), 282 deletions(-) delete mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl create mode 100644 Ryujinx.Graphics.Shader/ShaderCapabilities.cs create mode 100644 Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs create mode 100644 Ryujinx.Graphics.Shader/Translation/Lowering.cs diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 9640447be..246722f81 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -4,9 +4,14 @@ namespace Ryujinx.Graphics.GAL { public bool SupportsAstcCompression { get; } - public Capabilities(bool supportsAstcCompression) + public int StorageBufferOffsetAlignment { get; } + + public Capabilities( + bool supportsAstcCompression, + int storageBufferOffsetAlignment) { - SupportsAstcCompression = supportsAstcCompression; + SupportsAstcCompression = supportsAstcCompression; + StorageBufferOffsetAlignment = storageBufferOffsetAlignment; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index 51961522a..0906d10ee 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -45,7 +45,7 @@ namespace Ryujinx.Graphics.Gpu Window = new Window(this); - _caps = new Lazy(GetCapabilities); + _caps = new Lazy(Renderer.GetCapabilities); } internal void AdvanceSequence() @@ -53,11 +53,6 @@ namespace Ryujinx.Graphics.Gpu SequenceNumber++; } - private Capabilities GetCapabilities() - { - return Renderer.GetCapabilities(); - } - public void SetVmm(IPhysicalMemory mm) { PhysicalMemory = mm; diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index a066585ca..83ca5db53 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.GAL.InputAssembler; using Ryujinx.Graphics.Gpu.State; @@ -113,10 +114,9 @@ namespace Ryujinx.Graphics.Gpu.Memory public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size) { - // TODO: Improve - size += gpuVa & 0x3fUL; + size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); - gpuVa &= ~0x3fUL; + gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); ulong address = TranslateAndCreateBuffer(gpuVa, size); @@ -125,10 +125,9 @@ namespace Ryujinx.Graphics.Gpu.Memory public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size) { - // TODO: Improve - size += gpuVa & 0x3fUL; + size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); - gpuVa &= ~0x3fUL; + gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); ulong address = TranslateAndCreateBuffer(gpuVa, size); diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index a781de42a..8e39662d1 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -199,7 +199,7 @@ namespace Ryujinx.Graphics.Gpu.Shader Span code = _context.MemoryAccessor.Read(gpuVa, MaxProgramSize); - program = Translator.Translate(code, flags); + program = Translator.Translate(code, GetShaderCapabilities(), flags); int[] codeCached = MemoryMarshal.Cast(code.Slice(0, program.Size)).ToArray(); @@ -238,7 +238,7 @@ namespace Ryujinx.Graphics.Gpu.Shader Span codeA = _context.MemoryAccessor.Read(gpuVaA, MaxProgramSize); Span codeB = _context.MemoryAccessor.Read(gpuVa, MaxProgramSize); - program = Translator.Translate(codeA, codeB, flags); + program = Translator.Translate(codeA, codeB, GetShaderCapabilities(), flags); // TODO: We should also check "codeA" into account. codeCached = MemoryMarshal.Cast(codeB.Slice(0, program.Size)).ToArray(); @@ -258,7 +258,7 @@ namespace Ryujinx.Graphics.Gpu.Shader { Span code = _context.MemoryAccessor.Read(gpuVa, MaxProgramSize); - program = Translator.Translate(code, flags); + program = Translator.Translate(code, GetShaderCapabilities(), flags); codeCached = MemoryMarshal.Cast(code.Slice(0, program.Size)).ToArray(); @@ -342,5 +342,10 @@ namespace Ryujinx.Graphics.Gpu.Shader isFirst = false; } } + + private ShaderCapabilities GetShaderCapabilities() + { + return new ShaderCapabilities(_context.Capabilities.StorageBufferOffsetAlignment); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index f958946e7..70112a3a9 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -5,9 +5,13 @@ namespace Ryujinx.Graphics.OpenGL { static class HwCapabilities { - private static Lazy _astcCompression = new Lazy(() => HasExtension("GL_KHR_texture_compression_astc_ldr")); + private static Lazy _supportsAstcCompression = new Lazy(() => HasExtension("GL_KHR_texture_compression_astc_ldr")); - public static bool SupportsAstcCompression => _astcCompression.Value; + private static Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); + + public static bool SupportsAstcCompression => _supportsAstcCompression.Value; + + public static int StorageBufferOffsetAlignment => _storageBufferOffsetAlignment.Value; private static bool HasExtension(string name) { @@ -23,5 +27,10 @@ namespace Ryujinx.Graphics.OpenGL return false; } + + private static int GetLimit(All name) + { + return GL.GetInteger((GetPName)name); + } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 1baee04b1..c320d1504 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.OpenGL public Capabilities GetCapabilities() { - return new Capabilities(HwCapabilities.SupportsAstcCompression); + return new Capabilities( + HwCapabilities.SupportsAstcCompression, + HwCapabilities.StorageBufferOffsetAlignment); } public ulong GetCounter(CounterType type) diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index e8b449612..a5c8cc9a9 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -86,7 +86,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl if (info.SBuffers.Count != 0) { - DeclareUsedStorage(context, info); + DeclareStorages(context, info); context.AppendLine(); } @@ -176,11 +176,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(GetVarTypeName(decl.VarType) + " " + name + ";"); } - - if ((info.HelperFunctionsMask & HelperFunctionsMask.GlobalMemory) != 0) - { - context.AppendLine($"ivec2 {DefaultNames.GmemOffsetName};"); - } } private static string GetVarTypeName(VariableType type) @@ -218,31 +213,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } - private static void DeclareAllStorage(CodeGenContext context, StructuredProgramInfo info) - { - string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage); - - sbName += "_" + DefaultNames.StorageNamePrefix; - - string blockName = $"{sbName}_{DefaultNames.BlockSuffix}"; - - context.AppendLine("layout (std430) buffer " + blockName); - - context.EnterScope(); - - context.AppendLine("uint " + DefaultNames.DataName + "[];"); - - string arraySize = NumberFormatter.FormatInt(Constants.MaxShaderStorageBuffers); - - context.LeaveScope($" {sbName}[{arraySize}];"); - - for (int sbufSlot = 0; sbufSlot < Constants.MaxShaderStorageBuffers; sbufSlot++) - { - context.SBufferDescriptors.Add(new BufferDescriptor($"{blockName}[{sbufSlot}]", sbufSlot)); - } - } - - private static void DeclareUsedStorage(CodeGenContext context, StructuredProgramInfo info) + private static void DeclareStorages(CodeGenContext context, StructuredProgramInfo info) { string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index f1abc9495..4da38b2de 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -22,8 +22,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string LocalMemoryName = "local_mem"; public const string SharedMemoryName = "shared_mem"; - public const string GmemOffsetName = "gmemOffset"; - public const string UndefinedName = "undef"; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl deleted file mode 100644 index b8544ae23..000000000 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/GlobalMemory.glsl +++ /dev/null @@ -1,18 +0,0 @@ -ivec2 Helper_GetStorageBuffer(uint aLow, uint aHigh) -{ - uint64_t address = packUint2x32(uvec2(aLow, aHigh)); - int i; - for (i = 0; i < 16; i++) - { - int offset = 0x40 + i * 4; - uint baseLow = fp_c0_data[offset]; - uint baseHigh = fp_c0_data[offset + 1]; - uint size = fp_c0_data[offset + 2]; - uint64_t baseAddr = packUint2x32(uvec2(baseLow, baseHigh)); - if (address >= baseAddr && address < baseAddr + packUint2x32(uvec2(size, 0))) - { - return ivec2(i, int(unpackUint2x32(address - (baseAddr & ~63ul)).x) >> 2); - } - } - return ivec2(0); -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index 302b56add..f1540fbfb 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -2,8 +2,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class HelperFunctionNames { - public static string GetStorageBuffer = "Helper_GetStorageBuffer"; - public static string Shuffle = "Helper_Shuffle"; public static string ShuffleDown = "Helper_ShuffleDown"; public static string ShuffleUp = "Helper_ShuffleUp"; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index b5cab54e3..b6cdd7f60 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -49,12 +49,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions if (argIndex == 0 && atomic) { - switch (inst & Instruction.MrMask) + Instruction memRegion = inst & Instruction.MrMask; + + switch (memRegion) { - // TODO: Global. case Instruction.MrShared: args += LoadShared (context, operation); break; case Instruction.MrStorage: args += LoadStorage(context, operation); break; + + default: throw new InvalidOperationException($"Invalid memory region \"{memRegion}\"."); } + + // We use the first 2 operands above. + argIndex++; } else { @@ -150,8 +156,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } } - return "0"; - throw new InvalidOperationException($"Unexpected instruction type \"{info.Type}\"."); } } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index c535d8fcf..5c2ea85e6 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -119,19 +119,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return OperandManager.GetConstantBufferName(src1, offsetExpr, context.Config.Stage); } - public static string LoadGlobal(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - - string addrLowExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string addrHighExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - - context.AppendLine($"{DefaultNames.GmemOffsetName} = {HelperFunctionNames.GetStorageBuffer}({addrLowExpr}, {addrHighExpr});"); - - return GetStorageBufferAccessor($"{DefaultNames.GmemOffsetName}.x", $"{DefaultNames.GmemOffsetName}.y", context.Config.Stage); - } - public static string LoadLocal(CodeGenContext context, AstOperation operation) { return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName); @@ -152,29 +139,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } public static string LoadStorage(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - - return GetStorageBufferAccessor(operation.Index, offsetExpr, context.Config.Stage); - } - - public static string StoreGlobal(CodeGenContext context, AstOperation operation) { IAstNode src1 = operation.GetSource(0); IAstNode src2 = operation.GetSource(1); - IAstNode src3 = operation.GetSource(2); - string addrLowExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string addrHighExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - string valueExpr = GetSoureExpr(context, src3, GetSrcVarType(operation.Inst, 2)); + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - context.AppendLine($"{DefaultNames.GmemOffsetName} = {HelperFunctionNames.GetStorageBuffer}({addrLowExpr}, {addrHighExpr});"); - - string sb = GetStorageBufferAccessor($"{DefaultNames.GmemOffsetName}.x", $"{DefaultNames.GmemOffsetName}.y", context.Config.Stage); - - return $"{sb} = {valueExpr}"; + return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); } public static string StoreLocal(CodeGenContext context, AstOperation operation) @@ -205,14 +177,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions { IAstNode src1 = operation.GetSource(0); IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); - string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - VariableType srcType = OperandManager.GetNodeDestType(src2); + VariableType srcType = OperandManager.GetNodeDestType(src3); - string src = TypeConversion.ReinterpretCast(context, src2, srcType, VariableType.U32); + string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32); - string sb = GetStorageBufferAccessor(operation.Index, offsetExpr, context.Config.Stage); + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); return $"{sb} = {src}"; } @@ -489,27 +463,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]"; } - private static string GetStorageBufferAccessor(int slot, string offsetExpr, ShaderStage stage) - { - string sbName = OperandManager.GetShaderStagePrefix(stage); - - sbName += "_" + DefaultNames.StorageNamePrefix; - - string mask = NumberFormatter.FormatUint(~(64u - 1)); - - // Subtract the base address of the global memory, to get the - // storage buffer offset. The mask is used to keep the lower bits, - // since the bound storage buffer must match the host alignment - // restrictions. - int ubOffset = GlobalToStorage.GetStorageCbOffset(stage, slot); - - string ubName = OperandManager.GetConstantBufferName(0, ubOffset, stage); - - offsetExpr = $"{offsetExpr} - int((floatBitsToUint({ubName}) & {mask}) >> 2)"; - - return $"{sbName}[{NumberFormatter.FormatInt(slot)}].{DefaultNames.DataName}[{offsetExpr}]"; - } - private static string GetMask(int index) { return '.' + "rgba".Substring(index, 1); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs index 5836e981f..84e36cdd6 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs @@ -11,15 +11,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions OpBinaryCom = Op | 2 | Commutative, OpTernary = Op | 3, - AtomicBinary = CallBinary | Atomic, - AtomicTernary = CallTernary | Atomic, - CallNullary = Call | 0, CallUnary = Call | 1, CallBinary = Call | 2, CallTernary = Call | 3, CallQuaternary = Call | 4, + // The atomic instructions have one extra operand, + // for the storage slot and offset pair. + AtomicBinary = Call | Atomic | 3, + AtomicTernary = Call | Atomic | 4, + Commutative = 1 << 8, Op = 1 << 9, Call = 1 << 10, diff --git a/Ryujinx.Graphics.Shader/Decoders/IntegerSize.cs b/Ryujinx.Graphics.Shader/Decoders/IntegerSize.cs index 3ff8e1b26..d39c2a909 100644 --- a/Ryujinx.Graphics.Shader/Decoders/IntegerSize.cs +++ b/Ryujinx.Graphics.Shader/Decoders/IntegerSize.cs @@ -2,12 +2,13 @@ namespace Ryujinx.Graphics.Shader.Decoders { enum IntegerSize { - U8 = 0, - S8 = 1, - U16 = 2, - S16 = 3, - B32 = 4, - B64 = 5, - B128 = 6 + U8 = 0, + S8 = 1, + U16 = 2, + S16 = 3, + B32 = 4, + B64 = 5, + B128 = 6, + UB128 = 7 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 2abbed085..56688161c 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -11,7 +11,6 @@ namespace Ryujinx.Graphics.Shader.Instructions { private enum MemoryRegion { - Global, Local, Shared } @@ -60,13 +59,20 @@ namespace Ryujinx.Graphics.Shader.Instructions { OpCodeAtom op = (OpCodeAtom)context.CurrOp; - Operand mem = context.ShiftRightU32(GetSrcA(context), Const(2)); + Operand offset = context.ShiftRightU32(GetSrcA(context), Const(2)); - mem = context.IAdd(mem, Const(op.Offset)); + offset = context.IAdd(offset, Const(op.Offset)); Operand value = GetSrcB(context); - Operand res = EmitAtomicOp(context, Instruction.MrShared, op.AtomicOp, op.Type, mem, value); + Operand res = EmitAtomicOp( + context, + Instruction.MrShared, + op.AtomicOp, + op.Type, + offset, + Const(0), + value); context.Copy(GetDest(context), res); } @@ -148,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Ldg(EmitterContext context) { - EmitLoad(context, MemoryRegion.Global); + EmitLoadGlobal(context); } public static void Lds(EmitterContext context) @@ -183,11 +189,16 @@ namespace Ryujinx.Graphics.Shader.Instructions { OpCodeRed op = (OpCodeRed)context.CurrOp; - Operand offset = context.IAdd(GetSrcA(context), Const(op.Offset)); + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset); - Operand mem = context.ShiftRightU32(offset, Const(2)); - - EmitAtomicOp(context, Instruction.MrGlobal, op.AtomicOp, op.Type, mem, GetDest(context)); + EmitAtomicOp( + context, + Instruction.MrGlobal, + op.AtomicOp, + op.Type, + addrLow, + addrHigh, + GetDest(context)); } public static void St(EmitterContext context) @@ -197,7 +208,7 @@ namespace Ryujinx.Graphics.Shader.Instructions public static void Stg(EmitterContext context) { - EmitStore(context, MemoryRegion.Global); + EmitStoreGlobal(context); } public static void Sts(EmitterContext context) @@ -210,7 +221,8 @@ namespace Ryujinx.Graphics.Shader.Instructions Instruction mr, AtomicOp op, ReductionType type, - Operand mem, + Operand addrLow, + Operand addrHigh, Operand value) { Operand res = Const(0); @@ -220,7 +232,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.Add: if (type == ReductionType.S32 || type == ReductionType.U32) { - res = context.AtomicAdd(mr, mem, value); + res = context.AtomicAdd(mr, addrLow, addrHigh, value); } else { @@ -230,7 +242,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.BitwiseAnd: if (type == ReductionType.S32 || type == ReductionType.U32) { - res = context.AtomicAnd(mr, mem, value); + res = context.AtomicAnd(mr, addrLow, addrHigh, value); } else { @@ -240,7 +252,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.BitwiseExclusiveOr: if (type == ReductionType.S32 || type == ReductionType.U32) { - res = context.AtomicXor(mr, mem, value); + res = context.AtomicXor(mr, addrLow, addrHigh, value); } else { @@ -250,7 +262,7 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.BitwiseOr: if (type == ReductionType.S32 || type == ReductionType.U32) { - res = context.AtomicOr(mr, mem, value); + res = context.AtomicOr(mr, addrLow, addrHigh, value); } else { @@ -260,11 +272,11 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.Maximum: if (type == ReductionType.S32) { - res = context.AtomicMaxS32(mr, mem, value); + res = context.AtomicMaxS32(mr, addrLow, addrHigh, value); } else if (type == ReductionType.U32) { - res = context.AtomicMaxU32(mr, mem, value); + res = context.AtomicMaxU32(mr, addrLow, addrHigh, value); } else { @@ -274,11 +286,11 @@ namespace Ryujinx.Graphics.Shader.Instructions case AtomicOp.Minimum: if (type == ReductionType.S32) { - res = context.AtomicMinS32(mr, mem, value); + res = context.AtomicMinS32(mr, addrLow, addrHigh, value); } else if (type == ReductionType.U32) { - res = context.AtomicMinU32(mr, mem, value); + res = context.AtomicMinU32(mr, addrLow, addrHigh, value); } else { @@ -331,7 +343,6 @@ namespace Ryujinx.Graphics.Shader.Instructions switch (region) { - case MemoryRegion.Global: value = context.LoadGlobal(offset); break; case MemoryRegion.Local: value = context.LoadLocal (offset); break; case MemoryRegion.Shared: value = context.LoadShared(offset); break; } @@ -345,6 +356,38 @@ namespace Ryujinx.Graphics.Shader.Instructions } } + private static void EmitLoadGlobal(EmitterContext context) + { + OpCodeMemory op = (OpCodeMemory)context.CurrOp; + + bool isSmallInt = op.Size < IntegerSize.B32; + + int count = GetVectorCount(op.Size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr); + + if (rd.IsRZ) + { + break; + } + + Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh); + + if (isSmallInt) + { + value = ExtractSmallInt(context, op.Size, bitOffset, value); + } + + context.Copy(Register(rd), value); + } + } + private static void EmitStore(EmitterContext context, MemoryRegion region) { OpCodeMemory op = (OpCodeMemory)context.CurrOp; @@ -384,7 +427,6 @@ namespace Ryujinx.Graphics.Shader.Instructions switch (region) { - case MemoryRegion.Global: word = context.LoadGlobal(offset); break; case MemoryRegion.Local: word = context.LoadLocal (offset); break; case MemoryRegion.Shared: word = context.LoadShared(offset); break; } @@ -394,7 +436,6 @@ namespace Ryujinx.Graphics.Shader.Instructions switch (region) { - case MemoryRegion.Global: context.StoreGlobal(offset, value); break; case MemoryRegion.Local: context.StoreLocal (offset, value); break; case MemoryRegion.Shared: context.StoreShared(offset, value); break; } @@ -406,9 +447,89 @@ namespace Ryujinx.Graphics.Shader.Instructions } } + private static void EmitStoreGlobal(EmitterContext context) + { + OpCodeMemory op = (OpCodeMemory)context.CurrOp; + + bool isSmallInt = op.Size < IntegerSize.B32; + + int count = GetVectorCount(op.Size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr); + + Operand value = Register(rd); + + if (isSmallInt) + { + Operand word = context.LoadGlobal(addrLow, addrHigh); + + value = InsertSmallInt(context, op.Size, bitOffset, word, value); + } + + context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value); + + if (rd.IsRZ) + { + break; + } + } + } + + private static int GetVectorCount(IntegerSize size) + { + switch (size) + { + case IntegerSize.B64: + return 2; + case IntegerSize.B128: + case IntegerSize.UB128: + return 4; + } + + return 1; + } + + private static (Operand, Operand) Get40BitsAddress( + EmitterContext context, + Register ra, + bool extended, + int offset) + { + Operand addrLow = GetSrcA(context); + Operand addrHigh; + + if (extended && !ra.IsRZ) + { + addrHigh = Register(ra.Index + 1, RegisterType.Gpr); + } + else + { + addrHigh = Const(0); + } + + Operand offs = Const(offset); + + addrLow = context.IAdd(addrLow, offs); + + if (extended) + { + Operand carry = context.ICompareLessUnsigned(addrLow, offs); + + addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0))); + } + + return (addrLow, addrHigh); + } + private static Operand GetBitOffset(EmitterContext context, Operand baseOffset) { - // Note: byte offset = (baseOffset & 0b11) * 8. + // Note: bit offset = (baseOffset & 0b11) * 8. // Addresses should be always aligned to the integer type, // so we don't need to take unaligned addresses into account. return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3)); diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index a046c2f9e..e10d1edaf 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -1,7 +1,6 @@ - diff --git a/Ryujinx.Graphics.Shader/ShaderCapabilities.cs b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs new file mode 100644 index 000000000..939c7c1de --- /dev/null +++ b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Shader +{ + public struct ShaderCapabilities + { + private static readonly ShaderCapabilities _default = new ShaderCapabilities(16); + + public static ShaderCapabilities Default => _default; + + public int StorageBufferOffsetAlignment { get; } + + public ShaderCapabilities(int storageBufferOffsetAlignment) + { + StorageBufferOffsetAlignment = storageBufferOffsetAlignment; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/ShaderConfig.cs b/Ryujinx.Graphics.Shader/ShaderConfig.cs index 3583fa64c..3088cfbba 100644 --- a/Ryujinx.Graphics.Shader/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/ShaderConfig.cs @@ -6,6 +6,8 @@ namespace Ryujinx.Graphics.Shader { public ShaderStage Stage { get; } + public ShaderCapabilities Capabilities { get; } + public TranslationFlags Flags { get; } public int MaxOutputVertices { get; } @@ -13,12 +15,14 @@ namespace Ryujinx.Graphics.Shader public OutputTopology OutputTopology { get; } public ShaderConfig( - ShaderStage stage, - TranslationFlags flags, - int maxOutputVertices, - OutputTopology outputTopology) + ShaderStage stage, + ShaderCapabilities capabilities, + TranslationFlags flags, + int maxOutputVertices, + OutputTopology outputTopology) { Stage = stage; + Capabilities = capabilities; Flags = flags; MaxOutputVertices = maxOutputVertices; OutputTopology = outputTopology; diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index b262e6bc1..e2eee78d9 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -5,11 +5,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr [Flags] enum HelperFunctionsMask { - GlobalMemory = 1 << 0, - Shuffle = 1 << 1, - ShuffleDown = 1 << 2, - ShuffleUp = 1 << 3, - ShuffleXor = 1 << 4, - SwizzleAdd = 1 << 5 + Shuffle = 1 << 0, + ShuffleDown = 1 << 1, + ShuffleUp = 1 << 2, + ShuffleXor = 1 << 3, + SwizzleAdd = 1 << 4 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index 4c456d7bb..d1874f50f 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -25,16 +25,16 @@ namespace Ryujinx.Graphics.Shader.StructuredIr _infoTbl = new InstInfo[(int)Instruction.Count]; // Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type - Add(Instruction.AtomicAdd, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicAnd, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicCompareAndSwap, VariableType.U32, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicMaxS32, VariableType.S32, VariableType.S32, VariableType.S32); - Add(Instruction.AtomicMaxU32, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicMinS32, VariableType.S32, VariableType.S32, VariableType.S32); - Add(Instruction.AtomicMinU32, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicOr, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicSwap, VariableType.U32, VariableType.U32, VariableType.U32); - Add(Instruction.AtomicXor, VariableType.U32, VariableType.U32, VariableType.U32); + Add(Instruction.AtomicAdd, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicAnd, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicCompareAndSwap, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32, VariableType.U32); + Add(Instruction.AtomicMaxS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32); + Add(Instruction.AtomicMaxU32, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicMinS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32); + Add(Instruction.AtomicMinU32, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicOr, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicSwap, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); + Add(Instruction.AtomicXor, VariableType.U32, VariableType.S32, VariableType.S32, VariableType.U32); Add(Instruction.Absolute, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Add, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Ballot, VariableType.U32, VariableType.Bool); @@ -84,7 +84,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.LoadGlobal, VariableType.U32, VariableType.S32, VariableType.S32); Add(Instruction.LoadLocal, VariableType.U32, VariableType.S32); Add(Instruction.LoadShared, VariableType.U32, VariableType.S32); - Add(Instruction.LoadStorage, VariableType.U32, VariableType.S32); + Add(Instruction.LoadStorage, VariableType.U32, VariableType.S32, VariableType.S32); Add(Instruction.LogarithmB2, VariableType.Scalar, VariableType.Scalar); Add(Instruction.LogicalAnd, VariableType.Bool, VariableType.Bool, VariableType.Bool); Add(Instruction.LogicalExclusiveOr, VariableType.Bool, VariableType.Bool, VariableType.Bool); @@ -111,7 +111,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.StoreGlobal, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32); Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.U32); Add(Instruction.StoreShared, VariableType.None, VariableType.S32, VariableType.U32); - Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.U32); + Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32); Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.SwizzleAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.S32); Add(Instruction.TextureSample, VariableType.F32); diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index a81b3d12a..a85fbae3d 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; @@ -80,7 +81,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr } else if (UsesStorage(inst)) { - context.Info.SBuffers.Add(operation.Index); + AddSBufferUse(context.Info.SBuffers, operation); } AstAssignment assignment; @@ -159,7 +160,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { if (UsesStorage(inst)) { - context.Info.SBuffers.Add(operation.Index); + AddSBufferUse(context.Info.SBuffers, operation); } context.AddNode(new AstOperation(inst, operation.Index, sources)); @@ -170,10 +171,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // decide which helper functions are needed on the final generated code. switch (operation.Inst) { - case Instruction.LoadGlobal: - case Instruction.StoreGlobal: - context.Info.HelperFunctionsMask |= HelperFunctionsMask.GlobalMemory; - break; case Instruction.Shuffle: context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle; break; @@ -192,6 +189,26 @@ namespace Ryujinx.Graphics.Shader.StructuredIr } } + private static void AddSBufferUse(HashSet sBuffers, Operation operation) + { + Operand slot = operation.GetSource(0); + + if (slot.Type == OperandType.Constant) + { + sBuffers.Add(slot.Value); + } + else + { + // If the value is not constant, then we don't know + // how many storage buffers are used, so we assume + // all of them are used. + for (int index = 0; index < GlobalMemory.StorageMaxCount; index++) + { + sBuffers.Add(index); + } + } + } + private static VariableType GetVarTypeFromUses(Operand dest) { HashSet visited = new HashSet(); diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index df8867e9f..d884cfdb3 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -6,54 +6,54 @@ namespace Ryujinx.Graphics.Shader.Translation { static class EmitterContextInsts { - public static Operand AtomicAdd(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicAdd(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicAdd | mr, Local(), a, b); + return context.Add(Instruction.AtomicAdd | mr, Local(), a, b, c); } - public static Operand AtomicAnd(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicAnd(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicAnd | mr, Local(), a, b); + return context.Add(Instruction.AtomicAnd | mr, Local(), a, b, c); } - public static Operand AtomicCompareAndSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) + public static Operand AtomicCompareAndSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c, Operand d) { - return context.Add(Instruction.AtomicCompareAndSwap | mr, Local(), a, b, c); + return context.Add(Instruction.AtomicCompareAndSwap | mr, Local(), a, b, c, d); } - public static Operand AtomicMaxS32(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicMaxS32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicMaxS32 | mr, Local(), a, b); + return context.Add(Instruction.AtomicMaxS32 | mr, Local(), a, b, c); } - public static Operand AtomicMaxU32(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicMaxU32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicMaxU32 | mr, Local(), a, b); + return context.Add(Instruction.AtomicMaxU32 | mr, Local(), a, b, c); } - public static Operand AtomicMinS32(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicMinS32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicMinS32 | mr, Local(), a, b); + return context.Add(Instruction.AtomicMinS32 | mr, Local(), a, b, c); } - public static Operand AtomicMinU32(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicMinU32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicMinU32 | mr, Local(), a, b); + return context.Add(Instruction.AtomicMinU32 | mr, Local(), a, b, c); } - public static Operand AtomicOr(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicOr(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicOr | mr, Local(), a, b); + return context.Add(Instruction.AtomicOr | mr, Local(), a, b, c); } - public static Operand AtomicSwap(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicSwap | mr, Local(), a, b); + return context.Add(Instruction.AtomicSwap | mr, Local(), a, b, c); } - public static Operand AtomicXor(this EmitterContext context, Instruction mr, Operand a, Operand b) + public static Operand AtomicXor(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c) { - return context.Add(Instruction.AtomicXor | mr, Local(), a, b); + return context.Add(Instruction.AtomicXor | mr, Local(), a, b, c); } public static Operand Ballot(this EmitterContext context, Operand a) @@ -461,9 +461,9 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.LoadConstant, Local(), a, b); } - public static Operand LoadGlobal(this EmitterContext context, Operand a) + public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b) { - return context.Add(Instruction.LoadGlobal, Local(), a); + return context.Add(Instruction.LoadGlobal, Local(), a, b); } public static Operand LoadLocal(this EmitterContext context, Operand a) @@ -523,9 +523,9 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.ShuffleXor, Local(), a, b, c); } - public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b) + public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c) { - return context.Add(Instruction.StoreGlobal, null, a, b); + return context.Add(Instruction.StoreGlobal, null, a, b, c); } public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) diff --git a/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs new file mode 100644 index 000000000..4b5dbccb1 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs @@ -0,0 +1,46 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class GlobalMemory + { + private const int StorageDescsBaseOffset = 0x44; // In words. + + public const int StorageDescSize = 4; // In words. + public const int StorageMaxCount = 16; + + public const int StorageDescsSize = StorageDescSize * StorageMaxCount; + + public static bool UsesGlobalMemory(Instruction inst) + { + return (inst.IsAtomic() && IsGlobalMr(inst)) || + inst == Instruction.LoadGlobal || + inst == Instruction.StoreGlobal; + } + + private static bool IsGlobalMr(Instruction inst) + { + return (inst & Instruction.MrMask) == Instruction.MrGlobal; + } + + public static int GetStorageCbOffset(ShaderStage stage, int slot) + { + return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; + } + + public static int GetStorageBaseCbOffset(ShaderStage stage) + { + switch (stage) + { + case ShaderStage.Compute: return StorageDescsBaseOffset + 2 * StorageDescsSize; + case ShaderStage.Vertex: return StorageDescsBaseOffset; + case ShaderStage.TessellationControl: return StorageDescsBaseOffset + 1 * StorageDescsSize; + case ShaderStage.TessellationEvaluation: return StorageDescsBaseOffset + 2 * StorageDescsSize; + case ShaderStage.Geometry: return StorageDescsBaseOffset + 3 * StorageDescsSize; + case ShaderStage.Fragment: return StorageDescsBaseOffset + 4 * StorageDescsSize; + } + + return 0; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Lowering.cs b/Ryujinx.Graphics.Shader/Translation/Lowering.cs new file mode 100644 index 000000000..9a17dd83e --- /dev/null +++ b/Ryujinx.Graphics.Shader/Translation/Lowering.cs @@ -0,0 +1,121 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Lowering + { + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + { + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is Operation operation)) + { + continue; + } + + if (UsesGlobalMemory(operation.Inst)) + { + node = LowerGlobal(node, config); + } + } + } + } + + private static LinkedListNode LowerGlobal(LinkedListNode node, ShaderConfig config) + { + Operation operation = (Operation)node.Value; + + Operation storageOp; + + Operand PrependOperation(Instruction inst, params Operand[] sources) + { + Operand local = Local(); + + node.List.AddBefore(node, new Operation(inst, local, sources)); + + return local; + } + + Operand addrLow = operation.GetSource(0); + Operand addrHigh = operation.GetSource(1); + + Operand sbBaseAddrLow = Const(0); + Operand sbSlot = Const(0); + + for (int slot = 0; slot < StorageMaxCount; slot++) + { + int cbOffset = GetStorageCbOffset(config.Stage, slot); + + Operand baseAddrLow = Cbuf(0, cbOffset); + Operand baseAddrHigh = Cbuf(0, cbOffset + 1); + Operand size = Cbuf(0, cbOffset + 2); + + Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); + Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); + + Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); + + Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); + + Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); + + Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); + + sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); + sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); + } + + Operand alignMask = Const(-config.Capabilities.StorageBufferOffsetAlignment); + + Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, Const(-64)); + Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); + Operand wordOffset = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2)); + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = sbSlot; + sources[1] = wordOffset; + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + if (operation.Inst.IsAtomic()) + { + Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage; + + storageOp = new Operation(inst, operation.Dest, sources); + } + else if (operation.Inst == Instruction.LoadGlobal) + { + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + } + else + { + storageOp = new Operation(Instruction.StoreStorage, null, sources); + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode oldNode = node; + + node = node.List.AddBefore(node, storageOp); + + node.List.Remove(oldNode); + + return node; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 2fafa5add..639f9ba4b 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -1,20 +1,16 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using System.Collections.Generic; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { - private const int StorageDescsBaseOffset = 0x44; // In words. - - private const int StorageDescSize = 4; // In words. - private const int StorageMaxCount = 16; - - private const int StorageDescsSize = StorageDescSize * StorageMaxCount; - - public static void RunPass(BasicBlock block, ShaderStage stage) + public static void RunPass(BasicBlock block, ShaderConfig config) { - int sbStart = GetStorageBaseCbOffset(stage); + int sbStart = GetStorageBaseCbOffset(config.Stage); int sbEnd = sbStart + StorageDescsSize; @@ -25,9 +21,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations continue; } - if (operation.Inst.IsAtomic() || - operation.Inst == Instruction.LoadGlobal || - operation.Inst == Instruction.StoreGlobal) + if (UsesGlobalMemory(operation.Inst)) { Operand source = operation.GetSource(0); @@ -37,44 +31,68 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (storageIndex >= 0) { - node = ReplaceGlobalWithStorage(node, storageIndex); + node = ReplaceGlobalWithStorage(node, config, storageIndex); } } } } } - private static LinkedListNode ReplaceGlobalWithStorage(LinkedListNode node, int storageIndex) + private static LinkedListNode ReplaceGlobalWithStorage(LinkedListNode node, ShaderConfig config, int storageIndex) { Operation operation = (Operation)node.Value; Operation storageOp; + Operand GetStorageOffset() + { + Operand addrLow = operation.GetSource(0); + + Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex)); + + Operand baseAddrTrunc = Local(); + + Operand alignMask = Const(-config.Capabilities.StorageBufferOffsetAlignment); + + Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + + node.List.AddBefore(node, andOp); + + Operand byteOffset = Local(); + Operand wordOffset = Local(); + + Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); + Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + + node.List.AddBefore(node, subOp); + node.List.AddBefore(node, shrOp); + + return wordOffset; + } + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = Const(storageIndex); + sources[1] = GetStorageOffset(); + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + if (operation.Inst.IsAtomic()) { - Operand[] sources = new Operand[operation.SourcesCount]; - - for (int index = 0; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); - } - Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage; - storageOp = new Operation(inst, storageIndex, operation.Dest, sources); + storageOp = new Operation(inst, operation.Dest, sources); } else if (operation.Inst == Instruction.LoadGlobal) { - Operand source = operation.GetSource(0); - - storageOp = new Operation(Instruction.LoadStorage, storageIndex, operation.Dest, source); + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); } else { - Operand src1 = operation.GetSource(0); - Operand src2 = operation.GetSource(1); - - storageOp = new Operation(Instruction.StoreStorage, storageIndex, null, src1, src2); + storageOp = new Operation(Instruction.StoreStorage, null, sources); } for (int index = 0; index < operation.SourcesCount; index++) @@ -84,7 +102,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations LinkedListNode oldNode = node; - node = node.List.AddAfter(node, storageOp); + node = node.List.AddBefore(node, storageOp); node.List.Remove(oldNode); @@ -125,25 +143,5 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return -1; } - - public static int GetStorageCbOffset(ShaderStage stage, int slot) - { - return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; - } - - private static int GetStorageBaseCbOffset(ShaderStage stage) - { - switch (stage) - { - case ShaderStage.Compute: return StorageDescsBaseOffset + 2 * StorageDescsSize; - case ShaderStage.Vertex: return StorageDescsBaseOffset; - case ShaderStage.TessellationControl: return StorageDescsBaseOffset + 1 * StorageDescsSize; - case ShaderStage.TessellationEvaluation: return StorageDescsBaseOffset + 2 * StorageDescsSize; - case ShaderStage.Geometry: return StorageDescsBaseOffset + 3 * StorageDescsSize; - case ShaderStage.Fragment: return StorageDescsBaseOffset + 4 * StorageDescsSize; - } - - return 0; - } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index 93d86541f..c5db4678b 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -7,11 +7,11 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class Optimizer { - public static void Optimize(BasicBlock[] blocks, ShaderStage stage) + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) { for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - GlobalToStorage.RunPass(blocks[blkIndex], stage); + GlobalToStorage.RunPass(blocks[blkIndex], config); } bool modified; diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index 1c37fa70e..b129be939 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -47,7 +47,7 @@ namespace Ryujinx.Graphics.Shader.Translation return code.Slice(0, headerSize + (int)endAddress); } - public static ShaderProgram Translate(Span code, TranslationFlags flags) + public static ShaderProgram Translate(Span code, ShaderCapabilities capabilities, TranslationFlags flags) { bool compute = (flags & TranslationFlags.Compute) != 0; bool debugMode = (flags & TranslationFlags.DebugMode) != 0; @@ -82,6 +82,7 @@ namespace Ryujinx.Graphics.Shader.Translation ShaderConfig config = new ShaderConfig( stage, + capabilities, flags, maxOutputVertexCount, outputTopology); @@ -89,7 +90,7 @@ namespace Ryujinx.Graphics.Shader.Translation return Translate(ops, config, size); } - public static ShaderProgram Translate(Span vpACode, Span vpBCode, TranslationFlags flags) + public static ShaderProgram Translate(Span vpACode, Span vpBCode, ShaderCapabilities capabilities, TranslationFlags flags) { bool debugMode = (flags & TranslationFlags.DebugMode) != 0; @@ -98,6 +99,7 @@ namespace Ryujinx.Graphics.Shader.Translation ShaderConfig config = new ShaderConfig( header.Stage, + capabilities, flags, header.MaxOutputVertexCount, header.OutputTopology); @@ -107,20 +109,22 @@ namespace Ryujinx.Graphics.Shader.Translation private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size) { - BasicBlock[] irBlocks = ControlFlowGraph.MakeCfg(ops); + BasicBlock[] blocks = ControlFlowGraph.MakeCfg(ops); - if (irBlocks.Length > 0) + if (blocks.Length > 0) { - Dominance.FindDominators(irBlocks[0], irBlocks.Length); + Dominance.FindDominators(blocks[0], blocks.Length); - Dominance.FindDominanceFrontiers(irBlocks); + Dominance.FindDominanceFrontiers(blocks); - Ssa.Rename(irBlocks); + Ssa.Rename(blocks); - Optimizer.Optimize(irBlocks, config.Stage); + Optimizer.RunPass(blocks, config); + + Lowering.RunPass(blocks, config); } - StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(irBlocks, config); + StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(blocks, config); GlslProgram program = GlslGenerator.Generate(sInfo, config); diff --git a/Ryujinx.ShaderTools/Program.cs b/Ryujinx.ShaderTools/Program.cs index 6fa043a3c..275da794d 100644 --- a/Ryujinx.ShaderTools/Program.cs +++ b/Ryujinx.ShaderTools/Program.cs @@ -1,4 +1,5 @@ -using Ryujinx.Graphics.Shader.Translation; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; using System; using System.IO; @@ -19,7 +20,7 @@ namespace Ryujinx.ShaderTools byte[] data = File.ReadAllBytes(args[args.Length - 1]); - string code = Translator.Translate(data, flags).Code; + string code = Translator.Translate(data, ShaderCapabilities.Default, flags).Code; Console.WriteLine(code); }