From 25fd4ef10e610ee470b76d6f58b4a3b9cd053844 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 17 Oct 2021 17:28:18 -0300 Subject: [PATCH] Extend bindless elimination to work with masked and shifted handles (#2727) * Extent bindless elimination to work with masked handles * Extend bindless elimination to catch shifted pattern, refactor handle packing/unpacking --- .../Image/TextureBindingsManager.cs | 54 ++++------- Ryujinx.Graphics.Shader/TextureHandle.cs | 54 +++++++++++ .../Optimizations/BindlessElimination.cs | 91 ++++++++++++++++++- 3 files changed, 159 insertions(+), 40 deletions(-) create mode 100644 Ryujinx.Graphics.Shader/TextureHandle.cs diff --git a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs index dff32dc66..5862ea712 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureBindingsManager.cs @@ -14,12 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Image private const int InitialTextureStateSize = 32; private const int InitialImageStateSize = 8; - private const int HandleHigh = 16; - private const int HandleMask = (1 << HandleHigh) - 1; - - private const int SlotHigh = 16; - private const int SlotMask = (1 << SlotHigh) - 1; - private readonly GpuContext _context; private readonly bool _isCompute; @@ -348,19 +342,7 @@ namespace Ryujinx.Graphics.Gpu.Image { TextureBindingInfo bindingInfo = _textureBindings[stageIndex][index]; - int textureBufferIndex; - int samplerBufferIndex; - - if (bindingInfo.CbufSlot < 0) - { - textureBufferIndex = _textureBufferIndex; - samplerBufferIndex = textureBufferIndex; - } - else - { - textureBufferIndex = bindingInfo.CbufSlot & SlotMask; - samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex; - } + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex); int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); int textureId = UnpackTextureId(packedId); @@ -440,19 +422,7 @@ namespace Ryujinx.Graphics.Gpu.Image { TextureBindingInfo bindingInfo = _imageBindings[stageIndex][index]; - int textureBufferIndex; - int samplerBufferIndex; - - if (bindingInfo.CbufSlot < 0) - { - textureBufferIndex = _textureBufferIndex; - samplerBufferIndex = textureBufferIndex; - } - else - { - textureBufferIndex = bindingInfo.CbufSlot & SlotMask; - samplerBufferIndex = ((bindingInfo.CbufSlot >> SlotHigh) != 0) ? (bindingInfo.CbufSlot >> SlotHigh) - 1 : textureBufferIndex; - } + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(bindingInfo.CbufSlot, _textureBufferIndex); int packedId = ReadPackedId(stageIndex, bindingInfo.Handle, textureBufferIndex, samplerBufferIndex); int textureId = UnpackTextureId(packedId); @@ -522,8 +492,9 @@ namespace Ryujinx.Graphics.Gpu.Image int handle, int cbufSlot) { - int textureBufferIndex = cbufSlot < 0 ? bufferIndex : cbufSlot & SlotMask; - int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, textureBufferIndex); + (int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(cbufSlot, bufferIndex); + + int packedId = ReadPackedId(stageIndex, handle, textureBufferIndex, samplerBufferIndex); int textureId = UnpackTextureId(packedId); ulong poolAddress = _channel.MemoryManager.Translate(poolGpuVa); @@ -544,11 +515,13 @@ namespace Ryujinx.Graphics.Gpu.Image /// The packed texture and sampler ID (the real texture handle) private int ReadPackedId(int stageIndex, int wordOffset, int textureBufferIndex, int samplerBufferIndex) { + (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = TextureHandle.UnpackOffsets(wordOffset); + ulong textureBufferAddress = _isCompute ? _channel.BufferManager.GetComputeUniformBufferAddress(textureBufferIndex) : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, textureBufferIndex); - int handle = _channel.MemoryManager.Physical.Read(textureBufferAddress + (ulong)(wordOffset & HandleMask) * 4); + int handle = _channel.MemoryManager.Physical.Read(textureBufferAddress + (uint)textureWordOffset * 4); // The "wordOffset" (which is really the immediate value used on texture instructions on the shader) // is a 13-bit value. However, in order to also support separate samplers and textures (which uses @@ -556,13 +529,20 @@ namespace Ryujinx.Graphics.Gpu.Image // another offset for the sampler. // The shader translator has code to detect separate texture and sampler uses with a bindless texture, // turn that into a regular texture access and produce those special handles with values on the higher 16 bits. - if (wordOffset >> HandleHigh != 0) + if (handleType != TextureHandleType.CombinedSampler) { ulong samplerBufferAddress = _isCompute ? _channel.BufferManager.GetComputeUniformBufferAddress(samplerBufferIndex) : _channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, samplerBufferIndex); - handle |= _channel.MemoryManager.Physical.Read(samplerBufferAddress + (ulong)((wordOffset >> HandleHigh) - 1) * 4); + int samplerHandle = _channel.MemoryManager.Physical.Read(samplerBufferAddress + (uint)samplerWordOffset * 4); + + if (handleType == TextureHandleType.SeparateSamplerId) + { + samplerHandle <<= 20; + } + + handle |= samplerHandle; } return handle; diff --git a/Ryujinx.Graphics.Shader/TextureHandle.cs b/Ryujinx.Graphics.Shader/TextureHandle.cs new file mode 100644 index 000000000..b3712e6bf --- /dev/null +++ b/Ryujinx.Graphics.Shader/TextureHandle.cs @@ -0,0 +1,54 @@ +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader +{ + public enum TextureHandleType + { + CombinedSampler = 0, // Must be 0. + SeparateSamplerHandle = 1, + SeparateSamplerId = 2 + } + + public static class TextureHandle + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PackSlots(int cbufSlot0, int cbufSlot1) + { + return cbufSlot0 | ((cbufSlot1 + 1) << 16); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (int, int) UnpackSlots(int slots, int defaultTextureBufferIndex) + { + int textureBufferIndex; + int samplerBufferIndex; + + if (slots < 0) + { + textureBufferIndex = defaultTextureBufferIndex; + samplerBufferIndex = textureBufferIndex; + } + else + { + uint high = (uint)slots >> 16; + + textureBufferIndex = (ushort)slots; + samplerBufferIndex = high != 0 ? (int)high - 1 : textureBufferIndex; + } + + return (textureBufferIndex, samplerBufferIndex); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PackOffsets(int cbufOffset0, int cbufOffset1, TextureHandleType type) + { + return cbufOffset0 | (cbufOffset1 << 14) | ((int)type << 28); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (int, int, TextureHandleType) UnpackOffsets(int handle) + { + return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28)); + } + } +} diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs index e2f2b752a..a76df6a17 100644 --- a/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs +++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -51,6 +51,60 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block); Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block); + TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle; + + // Try to match masked pattern: + // - samplerHandle = samplerHandle & 0xFFF00000; + // - textureHandle = textureHandle & 0xFFFFF; + // - combinedHandle = samplerHandle | textureHandle; + // where samplerHandle and textureHandle comes from a constant buffer, and shifted pattern: + // - samplerHandle = samplerId << 20; + // - combinedHandle = samplerHandle | textureHandle; + // where samplerId and textureHandle comes from a constant buffer. + if (src0.AsgOp is Operation src0AsgOp) + { + if (src1.AsgOp is Operation src1AsgOp && + src0AsgOp.Inst == Instruction.BitwiseAnd && + src1AsgOp.Inst == Instruction.BitwiseAnd) + { + src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000); + + // The OR operation is commutative, so we can also try to swap the operands to get a match. + if (src0 == null || src1 == null) + { + src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000); + } + + if (src0 == null || src1 == null) + { + continue; + } + } + else if (src0AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src0AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src0 = src1; + src1 = src0AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + } + else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src1AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src1 = src1AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + if (src0.Type != OperandType.ConstantBuffer || src1.Type != OperandType.ConstantBuffer) { continue; @@ -59,8 +113,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations SetHandle( config, texOp, - src0.GetCbufOffset() | ((src1.GetCbufOffset() + 1) << 16), - src0.GetCbufSlot() | ((src1.GetCbufSlot() + 1) << 16), + TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType), + TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()), rewriteSamplerType); } else if (texOp.Inst == Instruction.ImageLoad || @@ -89,10 +143,41 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } + private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask) + { + // Assume it was already checked that the operation is bitwise AND. + Operand src0 = asgOp.GetSource(0); + Operand src1 = asgOp.GetSource(1); + + if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer) + { + // We can't check if the mask matches here as both operands are from a constant buffer. + // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers + // uses this one to store compiler constants. + return src0.GetCbufSlot() == 1 ? src1 : src0; + } + else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant) + { + if ((uint)src1.Value == mask) + { + return src0; + } + } + else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer) + { + if ((uint)src0.Value == mask) + { + return src1; + } + } + + return null; + } + private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType) { texOp.SetHandle(cbufOffset, cbufSlot); - + if (rewriteSamplerType) { texOp.Type = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);