Ryujinx/Ryujinx.Graphics.Shader/Translation/Translator.cs
riperiperi 484eb645ae
Implement Zero-Configuration Resolution Scaling (#1365)
* Initial implementation of Render Target Scaling

Works with most games I have. No GUI option right now, it is hardcoded.

Missing handling for texelFetch operation.

* Realtime Configuration, refactoring.

* texelFetch scaling on fragment shader (WIP)

* Improve Shader-Side changes.

* Fix potential crash when no color/depth bound

* Workaround random uses of textures in compute.

This was blacklisting textures in a few games despite causing no bugs. Will eventually add full support so this doesn't break anything.

* Fix scales oscillating when changing between non-native scales.

* Scaled textures on compute, cleanup, lazier uniform update.

* Cleanup.

* Fix stupidity

* Address Thog Feedback.

* Cover most of GDK's feedback (two comments remain)

* Fix bad rename

* Move IsDepthStencil to FormatExtensions, add docs.

* Fix default config, square texture detection.

* Three final fixes:

- Nearest copy when texture is integer format.
- Texture2D -> Texture3D copy correctly blacklists the texture before trying an unscaled copy (caused driver error)
- Discount small textures.

* Remove scale threshold.

Not needed right now - we'll see if we run into problems.

* All CPU modification blacklists scale.

* Fix comment.
2020-07-07 04:41:07 +02:00

289 lines
9.8 KiB
C#

using Ryujinx.Graphics.Shader.CodeGen.Glsl;
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation
{
public static class Translator
{
private const int HeaderSize = 0x50;
public static ShaderProgram Translate(ulong address, IGpuAccessor gpuAccessor, TranslationFlags flags)
{
Operation[] ops = DecodeShader(address, gpuAccessor, flags, out ShaderConfig config, out int size, out FeatureFlags featureFlags);
config.UsedFeatures = featureFlags;
return Translate(ops, config, size);
}
public static ShaderProgram Translate(ulong addressA, ulong addressB, IGpuAccessor gpuAccessor, TranslationFlags flags)
{
Operation[] opsA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, out _, out int sizeA, out FeatureFlags featureFlagsA);
Operation[] opsB = DecodeShader(addressB, gpuAccessor, flags, out ShaderConfig config, out int sizeB, out FeatureFlags featureFlagsB);
config.UsedFeatures = featureFlagsA | featureFlagsB;
return Translate(Combine(opsA, opsB), config, sizeB, sizeA);
}
private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size, int sizeA = 0)
{
BasicBlock[] blocks = ControlFlowGraph.MakeCfg(ops);
if (blocks.Length > 0)
{
Dominance.FindDominators(blocks[0], blocks.Length);
Dominance.FindDominanceFrontiers(blocks);
Ssa.Rename(blocks);
Optimizer.RunPass(blocks, config);
Lowering.RunPass(blocks, config);
}
StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(blocks, config);
GlslProgram program = GlslGenerator.Generate(sInfo, config);
ShaderProgramInfo spInfo = new ShaderProgramInfo(
program.CBufferDescriptors,
program.SBufferDescriptors,
program.TextureDescriptors,
program.ImageDescriptors,
sInfo.UsesInstanceId);
string glslCode = program.Code;
return new ShaderProgram(spInfo, config.Stage, glslCode, size, sizeA);
}
private static Operation[] DecodeShader(
ulong address,
IGpuAccessor gpuAccessor,
TranslationFlags flags,
out ShaderConfig config,
out int size,
out FeatureFlags featureFlags)
{
Block[] cfg;
if ((flags & TranslationFlags.Compute) != 0)
{
config = new ShaderConfig(gpuAccessor, flags);
cfg = Decoder.Decode(gpuAccessor, address);
}
else
{
config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, flags);
cfg = Decoder.Decode(gpuAccessor, address + HeaderSize);
}
if (cfg == null)
{
gpuAccessor.Log("Invalid branch detected, failed to build CFG.");
size = 0;
featureFlags = FeatureFlags.None;
return Array.Empty<Operation>();
}
EmitterContext context = new EmitterContext(config);
ulong maxEndAddress = 0;
for (int blkIndex = 0; blkIndex < cfg.Length; blkIndex++)
{
Block block = cfg[blkIndex];
if (maxEndAddress < block.EndAddress)
{
maxEndAddress = block.EndAddress;
}
context.CurrBlock = block;
context.MarkLabel(context.GetLabel(block.Address));
for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
{
OpCode op = block.OpCodes[opIndex];
if ((flags & TranslationFlags.DebugMode) != 0)
{
string instName;
if (op.Emitter != null)
{
instName = op.Emitter.Method.Name;
}
else
{
instName = "???";
gpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
}
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
context.Add(new CommentNode(dbgComment));
}
if (op.NeverExecute)
{
continue;
}
Operand predSkipLbl = null;
bool skipPredicateCheck = op is OpCodeBranch opBranch && !opBranch.PushTarget;
if (op is OpCodeBranchPop opBranchPop)
{
// If the instruction is a SYNC or BRK instruction with only one
// possible target address, then the instruction is basically
// just a simple branch, we can generate code similar to branch
// instructions, with the condition check on the branch itself.
skipPredicateCheck = opBranchPop.Targets.Count < 2;
}
if (!(op.Predicate.IsPT || skipPredicateCheck))
{
Operand label;
if (opIndex == block.OpCodes.Count - 1 && block.Next != null)
{
label = context.GetLabel(block.Next.Address);
}
else
{
label = Label();
predSkipLbl = label;
}
Operand pred = Register(op.Predicate);
if (op.InvertPredicate)
{
context.BranchIfTrue(label, pred);
}
else
{
context.BranchIfFalse(label, pred);
}
}
context.CurrOp = op;
op.Emitter?.Invoke(context);
if (predSkipLbl != null)
{
context.MarkLabel(predSkipLbl);
}
}
}
size = (int)maxEndAddress + (((flags & TranslationFlags.Compute) != 0) ? 0 : HeaderSize);
featureFlags = context.UsedFeatures;
return context.GetOperations();
}
private static Operation[] Combine(Operation[] a, Operation[] b)
{
// Here we combine two shaders.
// For shader A:
// - All user attribute stores on shader A are turned into copies to a
// temporary variable. It's assumed that shader B will consume them.
// - All return instructions are turned into branch instructions, the
// branch target being the start of the shader B code.
// For shader B:
// - All user attribute loads on shader B are turned into copies from a
// temporary variable, as long that attribute is written by shader A.
List<Operation> output = new List<Operation>(a.Length + b.Length);
Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
Operand lblB = Label();
for (int index = 0; index < a.Length; index++)
{
Operation operation = a[index];
if (IsUserAttribute(operation.Dest))
{
int tIndex = (operation.Dest.Value - AttributeConsts.UserAttributeBase) / 4;
Operand temp = temps[tIndex];
if (temp == null)
{
temp = Local();
temps[tIndex] = temp;
}
operation.Dest = temp;
}
if (operation.Inst == Instruction.Return)
{
output.Add(new Operation(Instruction.Branch, lblB));
}
else
{
output.Add(operation);
}
}
output.Add(new Operation(Instruction.MarkLabel, lblB));
for (int index = 0; index < b.Length; index++)
{
Operation operation = b[index];
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (IsUserAttribute(src))
{
Operand temp = temps[(src.Value - AttributeConsts.UserAttributeBase) / 4];
if (temp != null)
{
operation.SetSource(srcIndex, temp);
}
}
}
output.Add(operation);
}
return output.ToArray();
}
private static bool IsUserAttribute(Operand operand)
{
return operand != null &&
operand.Type == OperandType.Attribute &&
operand.Value >= AttributeConsts.UserAttributeBase &&
operand.Value < AttributeConsts.UserAttributeEnd;
}
}
}