Implement shader LEA instruction and improve bindless image load/store (#1355)
This commit is contained in:
parent
76e5af967a
commit
e13154c83d
7 changed files with 82 additions and 43 deletions
|
@ -4,12 +4,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
{
|
{
|
||||||
public static bool Extract(this int value, int lsb)
|
public static bool Extract(this int value, int lsb)
|
||||||
{
|
{
|
||||||
return ((int)(value >> lsb) & 1) != 0;
|
return ((value >> lsb) & 1) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int Extract(this int value, int lsb, int length)
|
public static int Extract(this int value, int lsb, int length)
|
||||||
{
|
{
|
||||||
return (int)(value >> lsb) & (int)(uint.MaxValue >> (32 - length));
|
return (value >> lsb) & (int)(uint.MaxValue >> (32 - length));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static bool Extract(this long value, int lsb)
|
public static bool Extract(this long value, int lsb)
|
||||||
|
|
|
@ -176,6 +176,9 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
|
Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
|
||||||
Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
|
Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
|
||||||
Set("1110111101001x", InstEmit.Lds, typeof(OpCodeMemory));
|
Set("1110111101001x", InstEmit.Lds, typeof(OpCodeMemory));
|
||||||
|
Set("010010111101xx", InstEmit.Lea, typeof(OpCodeAluCbuf));
|
||||||
|
Set("0011011x11010x", InstEmit.Lea, typeof(OpCodeAluImm));
|
||||||
|
Set("0101101111010x", InstEmit.Lea, typeof(OpCodeAluReg));
|
||||||
Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
|
Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
|
||||||
Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
|
Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
|
||||||
Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));
|
Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));
|
||||||
|
|
|
@ -384,6 +384,27 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
context.Copy(Register(op.Predicate0), p1Res);
|
context.Copy(Register(op.Predicate0), p1Res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Lea(EmitterContext context)
|
||||||
|
{
|
||||||
|
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
|
||||||
|
|
||||||
|
bool negateA = op.RawOpCode.Extract(45);
|
||||||
|
|
||||||
|
int shift = op.RawOpCode.Extract(39, 5);
|
||||||
|
|
||||||
|
Operand srcA = GetSrcA(context);
|
||||||
|
Operand srcB = GetSrcB(context);
|
||||||
|
|
||||||
|
srcA = context.ShiftLeft(srcA, Const(shift));
|
||||||
|
srcA = context.INegate(srcA, negateA);
|
||||||
|
|
||||||
|
Operand res = context.IAdd(srcA, srcB);
|
||||||
|
|
||||||
|
context.Copy(GetDest(context), res);
|
||||||
|
|
||||||
|
// TODO: CC, X
|
||||||
|
}
|
||||||
|
|
||||||
public static void Lop(EmitterContext context)
|
public static void Lop(EmitterContext context)
|
||||||
{
|
{
|
||||||
IOpCodeLop op = (IOpCodeLop)context.CurrOp;
|
IOpCodeLop op = (IOpCodeLop)context.CurrOp;
|
||||||
|
|
|
@ -99,7 +99,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
|
|
||||||
if (!op.IsBindless)
|
if (!op.IsBindless)
|
||||||
{
|
{
|
||||||
operation.Format = GetTextureFormat(context, handle);
|
operation.Format = context.Config.GetTextureFormat(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
context.Add(operation);
|
context.Add(operation);
|
||||||
|
@ -228,7 +228,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
|
|
||||||
if (!op.IsBindless)
|
if (!op.IsBindless)
|
||||||
{
|
{
|
||||||
format = GetTextureFormat(context, op.Immediate);
|
format = context.Config.GetTextureFormat(op.Immediate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1223,27 +1223,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static TextureFormat GetTextureFormat(EmitterContext context, int handle)
|
|
||||||
{
|
|
||||||
// When the formatted load extension is supported, we don't need to
|
|
||||||
// specify a format, we can just declare it without a format and the GPU will handle it.
|
|
||||||
if (context.Config.GpuAccessor.QuerySupportsImageLoadFormatted())
|
|
||||||
{
|
|
||||||
return TextureFormat.Unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
var format = context.Config.GpuAccessor.QueryTextureFormat(handle);
|
|
||||||
|
|
||||||
if (format == TextureFormat.Unknown)
|
|
||||||
{
|
|
||||||
context.Config.GpuAccessor.Log($"Unknown format for texture {handle}.");
|
|
||||||
|
|
||||||
format = TextureFormat.R8G8B8A8Unorm;
|
|
||||||
}
|
|
||||||
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static TextureFormat GetTextureFormat(IntegerSize size)
|
private static TextureFormat GetTextureFormat(IntegerSize size)
|
||||||
{
|
{
|
||||||
return size switch
|
return size switch
|
||||||
|
|
|
@ -5,7 +5,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
{
|
{
|
||||||
class BindlessElimination
|
class BindlessElimination
|
||||||
{
|
{
|
||||||
public static void RunPass(BasicBlock block)
|
private const int NvnTextureBufferSlot = 2;
|
||||||
|
|
||||||
|
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||||
{
|
{
|
||||||
// We can turn a bindless into regular access by recognizing the pattern
|
// We can turn a bindless into regular access by recognizing the pattern
|
||||||
// produced by the compiler for separate texture and sampler.
|
// produced by the compiler for separate texture and sampler.
|
||||||
|
@ -24,6 +26,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (texOp.Inst == Instruction.TextureSample)
|
||||||
|
{
|
||||||
if (!(texOp.GetSource(0).AsgOp is Operation handleCombineOp))
|
if (!(texOp.GetSource(0).AsgOp is Operation handleCombineOp))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
|
@ -37,14 +41,25 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
Operand src0 = handleCombineOp.GetSource(0);
|
Operand src0 = handleCombineOp.GetSource(0);
|
||||||
Operand src1 = handleCombineOp.GetSource(1);
|
Operand src1 = handleCombineOp.GetSource(1);
|
||||||
|
|
||||||
if (src0.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != 2 ||
|
if (src0.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != NvnTextureBufferSlot ||
|
||||||
src1.Type != OperandType.ConstantBuffer || src1.GetCbufSlot() != 2)
|
src1.Type != OperandType.ConstantBuffer || src1.GetCbufSlot() != NvnTextureBufferSlot)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
texOp.SetHandle(src0.GetCbufOffset() | (src1.GetCbufOffset() << 16));
|
texOp.SetHandle(src0.GetCbufOffset() | (src1.GetCbufOffset() << 16));
|
||||||
}
|
}
|
||||||
|
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
|
||||||
|
{
|
||||||
|
Operand src0 = texOp.GetSource(0);
|
||||||
|
|
||||||
|
if (src0.Type == OperandType.ConstantBuffer && src0.GetCbufSlot() == NvnTextureBufferSlot)
|
||||||
|
{
|
||||||
|
texOp.SetHandle(src0.GetCbufOffset());
|
||||||
|
texOp.Format = config.GetTextureFormat(texOp.Handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,7 +89,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||||
{
|
{
|
||||||
BindlessToIndexed.RunPass(blocks[blkIndex]);
|
BindlessToIndexed.RunPass(blocks[blkIndex]);
|
||||||
BindlessElimination.RunPass(blocks[blkIndex]);
|
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||||
|
|
||||||
// Try to eliminate any operations that are now unused.
|
// Try to eliminate any operations that are now unused.
|
||||||
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;
|
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;
|
||||||
|
|
|
@ -68,5 +68,26 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||||
// The depth register is always two registers after the last color output.
|
// The depth register is always two registers after the last color output.
|
||||||
return count + 1;
|
return count + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TextureFormat GetTextureFormat(int handle)
|
||||||
|
{
|
||||||
|
// When the formatted load extension is supported, we don't need to
|
||||||
|
// specify a format, we can just declare it without a format and the GPU will handle it.
|
||||||
|
if (GpuAccessor.QuerySupportsImageLoadFormatted())
|
||||||
|
{
|
||||||
|
return TextureFormat.Unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
var format = GpuAccessor.QueryTextureFormat(handle);
|
||||||
|
|
||||||
|
if (format == TextureFormat.Unknown)
|
||||||
|
{
|
||||||
|
GpuAccessor.Log($"Unknown format for texture {handle}.");
|
||||||
|
|
||||||
|
format = TextureFormat.R8G8B8A8Unorm;
|
||||||
|
}
|
||||||
|
|
||||||
|
return format;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in a new issue