Implement BFI, BRK, FLO, FSWZADD, PBK, SHFL and TXD shader instructions, misc. fixes

This commit is contained in:
gdk 2019-10-31 00:29:22 -03:00 committed by Thog
parent d786d8d2b9
commit 278a4c317c
38 changed files with 972 additions and 166 deletions

View file

@ -0,0 +1,139 @@
using System;
using System.IO;
using System.Reflection;
using System.Threading.Tasks;
namespace Ryujinx.Common
{
public static class EmbeddedResources
{
private readonly static Assembly ResourceAssembly;
static EmbeddedResources()
{
ResourceAssembly = Assembly.GetAssembly(typeof(EmbeddedResources));
}
public static byte[] Read(string filename)
{
var (assembly, path) = ResolveManifestPath(filename);
return Read(assembly, path);
}
public static Task<byte[]> ReadAsync(string filename)
{
var (assembly, path) = ResolveManifestPath(filename);
return ReadAsync(assembly, path);
}
public static byte[] Read(Assembly assembly, string filename)
{
using (var stream = GetStream(assembly, filename))
{
if (stream == null)
return null;
using (var mem = new MemoryStream())
{
stream.CopyTo(mem);
return mem.ToArray();
}
}
}
public async static Task<byte[]> ReadAsync(Assembly assembly, string filename)
{
using (var stream = GetStream(assembly, filename))
{
if (stream == null)
return null;
using (var mem = new MemoryStream())
{
await stream.CopyToAsync(mem);
return mem.ToArray();
}
}
}
public static string ReadAllText(string filename)
{
var (assembly, path) = ResolveManifestPath(filename);
return ReadAllText(assembly, path);
}
public static Task<string> ReadAllTextAsync(string filename)
{
var (assembly, path) = ResolveManifestPath(filename);
return ReadAllTextAsync(assembly, path);
}
public static string ReadAllText(Assembly assembly, string filename)
{
using (var stream = GetStream(assembly, filename))
{
if (stream == null)
return null;
using (var reader = new StreamReader(stream))
{
return reader.ReadToEnd();
}
}
}
public async static Task<string> ReadAllTextAsync(Assembly assembly, string filename)
{
using (var stream = GetStream(assembly, filename))
{
if (stream == null)
return null;
using (var reader = new StreamReader(stream))
{
return await reader.ReadToEndAsync();
}
}
}
public static Stream GetStream(string filename)
{
var (assembly, path) = ResolveManifestPath(filename);
return GetStream(assembly, filename);
}
public static Stream GetStream(Assembly assembly, string filename)
{
var namespace_ = assembly.GetName().Name;
var manifestUri = namespace_ + "." + filename.Replace('/', '.');
var stream = assembly.GetManifestResourceStream(manifestUri);
if (stream == null)
return null;
return stream;
}
private static (Assembly, string) ResolveManifestPath(string filename)
{
var segments = filename.Split(new[] { '/' }, 2, StringSplitOptions.RemoveEmptyEntries);
if (segments.Length >= 2)
{
foreach (var assembly in AppDomain.CurrentDomain.GetAssemblies())
{
if (assembly.GetName().Name == segments[0])
return (assembly, segments[1]);
}
}
return (EmbeddedResources.ResourceAssembly, filename);
}
}
}

View file

@ -5,7 +5,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ {
class CodeGenContext class CodeGenContext
{ {
private const string Tab = " "; public const string Tab = " ";
public ShaderConfig Config { get; } public ShaderConfig Config { get; }
@ -90,5 +90,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return indentation; return indentation;
} }
public string GetTabString()
{
return Tab;
}
} }
} }

View file

@ -1,3 +1,4 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation; using Ryujinx.Graphics.Shader.Translation;
@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static void Declare(CodeGenContext context, StructuredProgramInfo info) public static void Declare(CodeGenContext context, StructuredProgramInfo info)
{ {
context.AppendLine("#version 420 core"); context.AppendLine("#version 420 core");
context.AppendLine("#extension GL_ARB_shader_ballot : enable");
context.AppendLine("#extension GL_ARB_shader_storage_buffer_object : enable"); context.AppendLine("#extension GL_ARB_shader_storage_buffer_object : enable");
if (context.Config.Stage == ShaderStage.Compute) if (context.Config.Stage == ShaderStage.Compute)
@ -131,6 +133,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
$"local_size_z = {localSizeZ}) in;"); $"local_size_z = {localSizeZ}) in;");
context.AppendLine(); context.AppendLine();
} }
if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
}
} }
public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info) public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info)
@ -321,6 +348,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
} }
} }
private static void AppendHelperFunction(CodeGenContext context, string filename)
{
string code = EmbeddedResources.ReadAllText(filename);
context.AppendLine(code.Replace("\t", CodeGenContext.Tab));
context.AppendLine();
}
private static string GetSamplerTypeName(SamplerType type) private static string GetSamplerTypeName(SamplerType type)
{ {
string typeName; string typeName;

View file

@ -33,6 +33,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
Declarations.DeclareLocals(context, info); Declarations.DeclareLocals(context, info);
// Some games will leave some elements of gl_Position uninitialized,
// in those cases, the elements will contain undefined values according
// to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1),
// so we do explicit initialization to avoid UB on non-NVIDIA gpus.
if (context.Config.Stage == ShaderStage.Vertex)
{
context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);");
}
// Ensure that unused attributes are set, otherwise the downstream // Ensure that unused attributes are set, otherwise the downstream
// compiler may eliminate them. // compiler may eliminate them.
// (Not needed for fragment shader as it is the last stage). // (Not needed for fragment shader as it is the last stage).

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class HelperFunctionNames
{
public static string Shuffle = "Helper_Shuffle";
public static string ShuffleDown = "Helper_ShuffleDown";
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
}
}

View file

@ -0,0 +1,9 @@
float Helper_Shuffle(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleDown(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB + index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,8 @@
float Helper_ShuffleUp(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint srcThreadId = gl_SubGroupInvocationARB - index;
return (srcThreadId >= minThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleXor(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB ^ index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,7 @@
float Helper_SwizzleAdd(float x, float y, int mask)
{
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2;
return x * xLut[lutIdx] + y * yLut[lutIdx];
}

View file

@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.Absolute, InstType.CallUnary, "abs"); Add(Instruction.Absolute, InstType.CallUnary, "abs");
Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract"); Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract"); Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert"); Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert");
@ -41,11 +42,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ConvertS32ToFP, InstType.CallUnary, "float"); Add(Instruction.ConvertS32ToFP, InstType.CallUnary, "float");
Add(Instruction.ConvertU32ToFP, InstType.CallUnary, "float"); Add(Instruction.ConvertU32ToFP, InstType.CallUnary, "float");
Add(Instruction.Cosine, InstType.CallUnary, "cos"); Add(Instruction.Cosine, InstType.CallUnary, "cos");
Add(Instruction.Ddx, InstType.CallUnary, "dFdx");
Add(Instruction.Ddy, InstType.CallUnary, "dFdy");
Add(Instruction.Discard, InstType.OpNullary, "discard"); Add(Instruction.Discard, InstType.OpNullary, "discard");
Add(Instruction.Divide, InstType.OpBinary, "/", 1); Add(Instruction.Divide, InstType.OpBinary, "/", 1);
Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex"); Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex");
Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive"); Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive");
Add(Instruction.ExponentB2, InstType.CallUnary, "exp2"); Add(Instruction.ExponentB2, InstType.CallUnary, "exp2");
Add(Instruction.FindFirstSetS32, InstType.CallUnary, "findMSB");
Add(Instruction.FindFirstSetU32, InstType.CallUnary, "findMSB");
Add(Instruction.Floor, InstType.CallUnary, "floor"); Add(Instruction.Floor, InstType.CallUnary, "floor");
Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma"); Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma");
Add(Instruction.ImageLoad, InstType.Special); Add(Instruction.ImageLoad, InstType.Special);
@ -66,6 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3); Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3); Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3); Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
Add(Instruction.Shuffle, InstType.CallTernary, HelperFunctionNames.Shuffle);
Add(Instruction.ShuffleDown, InstType.CallTernary, HelperFunctionNames.ShuffleDown);
Add(Instruction.ShuffleUp, InstType.CallTernary, HelperFunctionNames.ShuffleUp);
Add(Instruction.ShuffleXor, InstType.CallTernary, HelperFunctionNames.ShuffleXor);
Add(Instruction.Maximum, InstType.CallBinary, "max"); Add(Instruction.Maximum, InstType.CallBinary, "max");
Add(Instruction.MaximumU32, InstType.CallBinary, "max"); Add(Instruction.MaximumU32, InstType.CallBinary, "max");
Add(Instruction.Minimum, InstType.CallBinary, "min"); Add(Instruction.Minimum, InstType.CallBinary, "min");
@ -80,6 +89,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.StoreLocal, InstType.Special); Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreStorage, InstType.Special); Add(Instruction.StoreStorage, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2); Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special); Add(Instruction.TextureSample, InstType.Special);
Add(Instruction.TextureSize, InstType.Special); Add(Instruction.TextureSize, InstType.Special);
Add(Instruction.Truncate, InstType.CallUnary, "trunc"); Add(Instruction.Truncate, InstType.CallUnary, "trunc");

View file

@ -164,13 +164,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{ {
AstTextureOperation texOp = (AstTextureOperation)operation; AstTextureOperation texOp = (AstTextureOperation)operation;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0; bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
@ -190,6 +191,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{ {
texCall += "Gather"; texCall += "Gather";
} }
else if (hasDerivatives)
{
texCall += "Grad";
}
else if (hasLodLevel && !intCoords) else if (hasLodLevel && !intCoords)
{ {
texCall += "Lod"; texCall += "Lod";
@ -297,6 +302,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Append(AssemblePVector(pCount)); Append(AssemblePVector(pCount));
string AssembleDerivativesVector(int count)
{
if (count > 1)
{
string[] elems = new string[count];
for (int index = 0; index < count; index++)
{
elems[index] = Src(VariableType.F32);
}
return "vec" + count + "(" + string.Join(", ", elems) + ")";
}
else
{
return Src(VariableType.F32);
}
}
if (hasDerivatives)
{
Append(AssembleDerivativesVector(coordsCount)); // dPdx
Append(AssembleDerivativesVector(coordsCount)); // dPdy
}
if (hasExtraCompareArg) if (hasExtraCompareArg)
{ {
Append(Src(VariableType.F32)); Append(Src(VariableType.F32));

View file

@ -241,7 +241,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
private static bool IsBranch(OpCode opCode) private static bool IsBranch(OpCode opCode)
{ {
return (opCode is OpCodeBranch && opCode.Emitter != InstEmit.Ssy) || return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
opCode is OpCodeSync || opCode is OpCodeSync ||
opCode is OpCodeExit; opCode is OpCodeExit;
} }

View file

@ -6,9 +6,13 @@ namespace Ryujinx.Graphics.Shader.Decoders
{ {
public int Offset { get; } public int Offset { get; }
public bool PushTarget { get; protected set; }
public OpCodeBranch(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) public OpCodeBranch(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{ {
Offset = ((int)(opCode >> 20) << 8) >> 8; Offset = ((int)(opCode >> 20) << 8) >> 8;
PushTarget = false;
} }
public ulong GetAbsoluteAddress() public ulong GetAbsoluteAddress()

View file

@ -0,0 +1,40 @@
using Ryujinx.Graphics.Shader.Instructions;
namespace Ryujinx.Graphics.Shader.Decoders
{
class OpCodeShuffle : OpCode, IOpCodeRd, IOpCodeRa
{
public Register Rd { get; }
public Register Ra { get; }
public Register Rb { get; }
public Register Rc { get; }
public int ImmediateB { get; }
public int ImmediateC { get; }
public bool IsBImmediate { get; }
public bool IsCImmediate { get; }
public ShuffleType ShuffleType { get; }
public Register Predicate48 { get; }
public OpCodeShuffle(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr);
Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
ImmediateB = opCode.Extract(20, 5);
ImmediateC = opCode.Extract(34, 13);
IsBImmediate = opCode.Extract(28);
IsCImmediate = opCode.Extract(29);
ShuffleType = (ShuffleType)opCode.Extract(30, 2);
Predicate48 = new Register(opCode.Extract(48, 3), RegisterType.Predicate);
}
}
}

View file

@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
Predicate = new Register(RegisterConsts.PredicateTrueIndex, RegisterType.Predicate); Predicate = new Register(RegisterConsts.PredicateTrueIndex, RegisterType.Predicate);
InvertPredicate = false; InvertPredicate = false;
PushTarget = true;
} }
} }
} }

View file

@ -30,136 +30,148 @@ namespace Ryujinx.Graphics.Shader.Decoders
_opCodes = new TableEntry[1 << EncodingBits]; _opCodes = new TableEntry[1 << EncodingBits];
#region Instructions #region Instructions
Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute)); Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute));
Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute)); Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute));
Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf)); Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf));
Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm)); Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm));
Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg)); Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg));
Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch)); Set("0100101111110x", InstEmit.Bfi, typeof(OpCodeAluCbuf));
Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp)); Set("0011011x11110x", InstEmit.Bfi, typeof(OpCodeAluImm));
Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit)); Set("0101001111110x", InstEmit.Bfi, typeof(OpCodeAluRegCbuf));
Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf)); Set("0101101111110x", InstEmit.Bfi, typeof(OpCodeAluReg));
Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm)); Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch));
Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg)); Set("111000110100xx", InstEmit.Brk, typeof(OpCodeSync));
Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf)); Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp));
Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm)); Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit));
Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg)); Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf));
Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf)); Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm));
Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm)); Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg));
Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32)); Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf));
Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg)); Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm));
Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf)); Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg));
Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm)); Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf));
Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf)); Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm));
Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg)); Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32));
Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf)); Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg));
Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm)); Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf));
Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg)); Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm));
Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf)); Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf));
Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm)); Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg));
Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32)); Set("0100110000110x", InstEmit.Flo, typeof(OpCodeAluCbuf));
Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg)); Set("0011100x00110x", InstEmit.Flo, typeof(OpCodeAluImm));
Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf)); Set("0101110000110x", InstEmit.Flo, typeof(OpCodeAluReg));
Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm)); Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf));
Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg)); Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm));
Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf)); Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg));
Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm)); Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf));
Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg)); Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm));
Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf)); Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32));
Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10)); Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg));
Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32)); Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf));
Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg)); Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm));
Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf)); Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg));
Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10)); Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf));
Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32)); Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm));
Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg)); Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg));
Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf)); Set("0101000011111x", InstEmit.Fswzadd, typeof(OpCodeAluReg));
Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf)); Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf));
Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10)); Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10));
Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32)); Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32));
Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg)); Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg));
Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf)); Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf));
Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10)); Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10));
Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg)); Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32));
Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf)); Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg));
Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm)); Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf));
Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg)); Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf));
Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf)); Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10));
Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm)); Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32));
Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg)); Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg));
Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf)); Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf));
Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm)); Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10));
Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32)); Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg));
Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg)); Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf));
Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf)); Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm));
Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg));
Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg)); Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf));
Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf)); Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm));
Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm)); Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg));
Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg)); Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf));
Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa)); Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm));
Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu)); Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32));
Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf)); Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg));
Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm)); Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf));
Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32)); Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm));
Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg)); Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg));
Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf)); Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf));
Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm)); Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm));
Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg)); Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg));
Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf)); Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa));
Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm)); Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu));
Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg)); Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf));
Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit)); Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm));
Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory)); Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32));
Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc)); Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg));
Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory)); Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf));
Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf)); Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm));
Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm)); Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg));
Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32)); Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf));
Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg)); Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm));
Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf)); Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg));
Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm)); Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit));
Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg)); Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory));
Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf)); Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm)); Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32)); Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg)); Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith)); Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));
Set("1111101111100x", InstEmit.Out, typeof(OpCode)); Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg));
Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp)); Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf));
Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf)); Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm));
Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm)); Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg));
Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg)); Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf));
Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu)); Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm));
Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf)); Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32));
Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm)); Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg));
Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg)); Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith));
Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf)); Set("1111101111100x", InstEmit.Out, typeof(OpCode));
Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm)); Set("111000101010xx", InstEmit.Pbk, typeof(OpCodeSsy));
Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg)); Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp));
Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf)); Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf));
Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm)); Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm));
Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg)); Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg));
Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy)); Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu));
Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory)); Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf));
Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory)); Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm));
Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage)); Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg));
Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync)); Set("1110111100010x", InstEmit.Shfl, typeof(OpCodeShuffle));
Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex)); Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf));
Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB)); Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm));
Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs)); Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg));
Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds)); Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf));
Set("1101x11100xxxx", InstEmit.Texs, typeof(OpCodeTld4s)); Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm));
Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld)); Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg));
Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld)); Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy));
Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4)); Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory));
Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex)); Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory));
Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex)); Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage));
Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo)); Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync));
Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf)); Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex));
Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm)); Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB));
Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf)); Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs));
Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg)); Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds));
Set("11011111x0xxxx", InstEmit.Texs, typeof(OpCodeTld4s));
Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld));
Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld));
Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4));
Set("110111100x1110", InstEmit.Txd, typeof(OpCodeTxd));
Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex));
Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex));
Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo));
Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf));
Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm));
Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf));
Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg));
#endregion #endregion
} }

View file

@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
protected int RawType; protected int RawType;
public bool IsFp16 { get; } public bool IsFp16 { get; protected set; }
public OpCodeTextureScalar(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) public OpCodeTextureScalar(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{ {

View file

@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
GatherCompIndex = opCode.Extract(52, 2); GatherCompIndex = opCode.Extract(52, 2);
IsFp16 = opCode.Extract(55);
ComponentMask = Rd1.IsRZ ? 3 : 0xf; ComponentMask = Rd1.IsRZ ? 3 : 0xf;
} }
} }

View file

@ -0,0 +1,18 @@
using Ryujinx.Graphics.Shader.Instructions;
namespace Ryujinx.Graphics.Shader.Decoders
{
class OpCodeTxd : OpCodeTexture
{
public bool IsBindless { get; }
public OpCodeTxd(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
HasOffset = opCode.Extract(35);
IsBindless = opCode.Extract(54);
LodMode = TextureLodMode.None;
}
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Shader.Decoders
{
enum ShuffleType
{
Indexed = 0,
Up = 1,
Down = 2,
Butterfly = 3
}
}

View file

@ -2,12 +2,13 @@ namespace Ryujinx.Graphics.Shader.Decoders
{ {
enum SystemRegister enum SystemRegister
{ {
ThreadId = 0x20, YDirection = 0x12,
ThreadIdX = 0x21, ThreadId = 0x20,
ThreadIdY = 0x22, ThreadIdX = 0x21,
ThreadIdZ = 0x23, ThreadIdY = 0x22,
CtaIdX = 0x25, ThreadIdZ = 0x23,
CtaIdY = 0x26, CtaIdX = 0x25,
CtaIdZ = 0x27 CtaIdY = 0x26,
CtaIdZ = 0x27
} }
} }

View file

@ -39,6 +39,23 @@ namespace Ryujinx.Graphics.Shader.Instructions
// TODO: CC, X, corner cases // TODO: CC, X, corner cases
} }
public static void Bfi(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand srcC = GetSrcC(context);
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = context.BitfieldInsert(srcC, srcA, position, size);
context.Copy(GetDest(context), res);
}
public static void Csetp(EmitterContext context) public static void Csetp(EmitterContext context)
{ {
OpCodePsetp op = (OpCodePsetp)context.CurrOp; OpCodePsetp op = (OpCodePsetp)context.CurrOp;
@ -58,6 +75,28 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.Predicate0), p1Res); context.Copy(Register(op.Predicate0), p1Res);
} }
public static void Flo(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
bool invert = op.RawOpCode.Extract(40);
bool countZeros = op.RawOpCode.Extract(41);
bool isSigned = op.RawOpCode.Extract(48);
Operand srcB = context.BitwiseNot(GetSrcB(context), invert);
Operand res = isSigned
? context.FindFirstSetS32(srcB)
: context.FindFirstSetU32(srcB);
if (countZeros)
{
res = context.BitwiseExclusiveOr(res, Const(31));
}
context.Copy(GetDest(context), res);
}
public static void Iadd(EmitterContext context) public static void Iadd(EmitterContext context)
{ {
OpCodeAlu op = (OpCodeAlu)context.CurrOp; OpCodeAlu op = (OpCodeAlu)context.CurrOp;

View file

@ -180,6 +180,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.Predicate0), p1Res); context.Copy(Register(op.Predicate0), p1Res);
} }
public static void Fswzadd(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
int mask = op.RawOpCode.Extract(28, 8);
Operand srcA = GetSrcA(context);
Operand srcB = GetSrcB(context);
Operand dest = GetDest(context);
context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, mask));
SetFPZnFlags(context, dest, op.SetCondCode);
}
public static void Hadd2(EmitterContext context) public static void Hadd2(EmitterContext context)
{ {
Hadd2Hmul2Impl(context, isAdd: true); Hadd2Hmul2Impl(context, isAdd: true);

View file

@ -15,6 +15,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
EmitBranch(context, context.CurrBlock.Branch.Address); EmitBranch(context, context.CurrBlock.Branch.Address);
} }
public static void Brk(EmitterContext context)
{
EmitBrkOrSync(context);
}
public static void Exit(EmitterContext context) public static void Exit(EmitterContext context)
{ {
OpCodeExit op = (OpCodeExit)context.CurrOp; OpCodeExit op = (OpCodeExit)context.CurrOp;
@ -32,7 +37,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Discard(); context.Discard();
} }
public static void Pbk(EmitterContext context)
{
EmitPbkOrSsy(context);
}
public static void Ssy(EmitterContext context) public static void Ssy(EmitterContext context)
{
EmitPbkOrSsy(context);
}
public static void Sync(EmitterContext context)
{
EmitBrkOrSync(context);
}
private static void EmitPbkOrSsy(EmitterContext context)
{ {
OpCodeSsy op = (OpCodeSsy)context.CurrOp; OpCodeSsy op = (OpCodeSsy)context.CurrOp;
@ -48,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
} }
} }
public static void Sync(EmitterContext context) private static void EmitBrkOrSync(EmitterContext context)
{ {
OpCodeSync op = (OpCodeSync)context.CurrOp; OpCodeSync op = (OpCodeSync)context.CurrOp;

View file

@ -27,6 +27,9 @@ namespace Ryujinx.Graphics.Shader.Instructions
switch (sysReg) switch (sysReg)
{ {
// TODO: Use value from Y direction GPU register.
case SystemRegister.YDirection: src = ConstF(1); break;
case SystemRegister.ThreadId: case SystemRegister.ThreadId:
{ {
Operand tidX = Attribute(AttributeConsts.ThreadIdX); Operand tidX = Attribute(AttributeConsts.ThreadIdX);
@ -67,5 +70,37 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(GetDest(context), res); context.Copy(GetDest(context), res);
} }
public static void Shfl(EmitterContext context)
{
OpCodeShuffle op = (OpCodeShuffle)context.CurrOp;
Operand pred = Register(op.Predicate48);
Operand srcA = GetSrcA(context);
Operand srcB = op.IsBImmediate ? Const(op.ImmediateB) : Register(op.Rb);
Operand srcC = op.IsCImmediate ? Const(op.ImmediateC) : Register(op.Rc);
Operand res = null;
switch (op.ShuffleType)
{
case ShuffleType.Indexed:
res = context.Shuffle(srcA, srcB, srcC);
break;
case ShuffleType.Up:
res = context.ShuffleUp(srcA, srcB, srcC);
break;
case ShuffleType.Down:
res = context.ShuffleDown(srcA, srcB, srcC);
break;
case ShuffleType.Butterfly:
res = context.ShuffleXor(srcA, srcB, srcC);
break;
}
context.Copy(GetDest(context), res);
}
} }
} }

View file

@ -102,22 +102,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static void Tex(EmitterContext context) public static void Tex(EmitterContext context)
{ {
Tex(context, TextureFlags.None); EmitTextureSample(context, TextureFlags.None);
} }
public static void TexB(EmitterContext context) public static void TexB(EmitterContext context)
{ {
Tex(context, TextureFlags.Bindless); EmitTextureSample(context, TextureFlags.Bindless);
} }
public static void Tld(EmitterContext context) public static void Tld(EmitterContext context)
{ {
Tex(context, TextureFlags.IntCoords); EmitTextureSample(context, TextureFlags.IntCoords);
} }
public static void TldB(EmitterContext context) public static void TldB(EmitterContext context)
{ {
Tex(context, TextureFlags.IntCoords | TextureFlags.Bindless); EmitTextureSample(context, TextureFlags.IntCoords | TextureFlags.Bindless);
} }
public static void Texs(EmitterContext context) public static void Texs(EmitterContext context)
@ -512,17 +512,128 @@ namespace Ryujinx.Graphics.Shader.Instructions
} }
} }
public static void Txd(EmitterContext context)
{
OpCodeTxd op = (OpCodeTxd)context.CurrOp;
if (op.Rd.IsRZ)
{
return;
}
int raIndex = op.Ra.Index;
int rbIndex = op.Rb.Index;
Operand Ra()
{
if (raIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(raIndex++, RegisterType.Gpr));
}
Operand Rb()
{
if (rbIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(rbIndex++, RegisterType.Gpr));
}
TextureFlags flags = TextureFlags.Derivatives;
List<Operand> sourcesList = new List<Operand>();
if (op.IsBindless)
{
sourcesList.Add(Ra());
}
SamplerType type = GetSamplerType(op.Dimensions);
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
Operand packedParams = Ra();
if (op.IsArray)
{
sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff)));
type |= SamplerType.Array;
}
// Derivatives (X and Y).
for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++)
{
sourcesList.Add(Rb());
}
if (op.HasOffset)
{
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4)));
}
flags |= TextureFlags.Offset;
}
Operand[] sources = sourcesList.ToArray();
int rdIndex = op.Rd.Index;
Operand GetDest()
{
if (rdIndex > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return Register(rdIndex++, RegisterType.Gpr);
}
int handle = !op.IsBindless ? op.Immediate : 0;
for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
{
if ((compMask & 1) != 0)
{
Operand dest = GetDest();
TextureOperation operation = new TextureOperation(
Instruction.TextureSample,
type,
flags,
handle,
compIndex,
dest,
sources);
context.Add(operation);
}
}
}
public static void Txq(EmitterContext context) public static void Txq(EmitterContext context)
{ {
Txq(context, bindless: false); EmitTextureQuery(context, bindless: false);
} }
public static void TxqB(EmitterContext context) public static void TxqB(EmitterContext context)
{ {
Txq(context, bindless: true); EmitTextureQuery(context, bindless: true);
} }
private static void Txq(EmitterContext context, bool bindless) private static void EmitTextureQuery(EmitterContext context, bool bindless)
{ {
OpCodeTex op = (OpCodeTex)context.CurrOp; OpCodeTex op = (OpCodeTex)context.CurrOp;
@ -597,7 +708,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
} }
} }
private static void Tex(EmitterContext context, TextureFlags flags) private static void EmitTextureSample(EmitterContext context, TextureFlags flags)
{ {
OpCodeTexture op = (OpCodeTexture)context.CurrOp; OpCodeTexture op = (OpCodeTexture)context.CurrOp;

View file

@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{ {
Absolute = 1, Absolute = 1,
Add, Add,
BitCount,
BitfieldExtractS32, BitfieldExtractS32,
BitfieldExtractU32, BitfieldExtractU32,
BitfieldInsert, BitfieldInsert,
@ -38,11 +39,15 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
ConvertU32ToFP, ConvertU32ToFP,
Copy, Copy,
Cosine, Cosine,
Ddx,
Ddy,
Discard, Discard,
Divide, Divide,
EmitVertex, EmitVertex,
EndPrimitive, EndPrimitive,
ExponentB2, ExponentB2,
FindFirstSetS32,
FindFirstSetU32,
Floor, Floor,
FusedMultiplyAdd, FusedMultiplyAdd,
ImageLoad, ImageLoad,
@ -75,12 +80,17 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
ShiftLeft, ShiftLeft,
ShiftRightS32, ShiftRightS32,
ShiftRightU32, ShiftRightU32,
Shuffle,
ShuffleDown,
ShuffleUp,
ShuffleXor,
Sine, Sine,
SquareRoot, SquareRoot,
StoreGlobal, StoreGlobal,
StoreLocal, StoreLocal,
StoreStorage, StoreStorage,
Subtract, Subtract,
SwizzleAdd,
TextureSample, TextureSample,
TextureSize, TextureSize,
Truncate, Truncate,

View file

@ -80,7 +80,12 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
public void TurnIntoCopy(Operand source) public void TurnIntoCopy(Operand source)
{ {
Inst = Instruction.Copy; TurnInto(Instruction.Copy, source);
}
public void TurnInto(Instruction newInst, Operand source)
{
Inst = newInst;
foreach (Operand oldSrc in _sources) foreach (Operand oldSrc in _sources)
{ {

View file

@ -5,13 +5,14 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
[Flags] [Flags]
enum TextureFlags enum TextureFlags
{ {
None = 0, None = 0,
Bindless = 1 << 0, Bindless = 1 << 0,
Gather = 1 << 1, Gather = 1 << 1,
IntCoords = 1 << 2, Derivatives = 1 << 2,
LodBias = 1 << 3, IntCoords = 1 << 3,
LodLevel = 1 << 4, LodBias = 1 << 4,
Offset = 1 << 5, LodLevel = 1 << 5,
Offsets = 1 << 6 Offset = 1 << 6,
Offsets = 1 << 7
} }
} }

View file

@ -1,5 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
</ItemGroup>
<PropertyGroup> <PropertyGroup>
<TargetFramework>netcoreapp3.0</TargetFramework> <TargetFramework>netcoreapp3.0</TargetFramework>
</PropertyGroup> </PropertyGroup>

View file

@ -0,0 +1,14 @@
using System;
namespace Ryujinx.Graphics.Shader.StructuredIr
{
[Flags]
enum HelperFunctionsMask
{
Shuffle = 1 << 0,
ShuffleDown = 1 << 1,
ShuffleUp = 1 << 2,
ShuffleXor = 1 << 3,
SwizzleAdd = 1 << 4
}
}

View file

@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type // Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type
Add(Instruction.Absolute, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Absolute, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.Add, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Add, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.BitCount, VariableType.Int, VariableType.Int);
Add(Instruction.BitfieldExtractS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32); Add(Instruction.BitfieldExtractS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.BitfieldExtractU32, VariableType.U32, VariableType.U32, VariableType.S32, VariableType.S32); Add(Instruction.BitfieldExtractU32, VariableType.U32, VariableType.U32, VariableType.S32, VariableType.S32);
Add(Instruction.BitfieldInsert, VariableType.Int, VariableType.Int, VariableType.Int, VariableType.S32, VariableType.S32); Add(Instruction.BitfieldInsert, VariableType.Int, VariableType.Int, VariableType.Int, VariableType.S32, VariableType.S32);
@ -55,8 +56,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ConvertS32ToFP, VariableType.F32, VariableType.S32); Add(Instruction.ConvertS32ToFP, VariableType.F32, VariableType.S32);
Add(Instruction.ConvertU32ToFP, VariableType.F32, VariableType.U32); Add(Instruction.ConvertU32ToFP, VariableType.F32, VariableType.U32);
Add(Instruction.Cosine, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Cosine, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.Ddx, VariableType.F32, VariableType.F32);
Add(Instruction.Ddy, VariableType.F32, VariableType.F32);
Add(Instruction.Divide, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Divide, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.ExponentB2, VariableType.Scalar, VariableType.Scalar); Add(Instruction.ExponentB2, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.FindFirstSetS32, VariableType.S32, VariableType.S32);
Add(Instruction.FindFirstSetU32, VariableType.S32, VariableType.U32);
Add(Instruction.Floor, VariableType.F32, VariableType.F32); Add(Instruction.Floor, VariableType.F32, VariableType.F32);
Add(Instruction.FusedMultiplyAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.F32); Add(Instruction.FusedMultiplyAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.F32);
Add(Instruction.ImageLoad, VariableType.F32); Add(Instruction.ImageLoad, VariableType.F32);
@ -75,6 +80,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ShiftLeft, VariableType.Int, VariableType.Int, VariableType.Int); Add(Instruction.ShiftLeft, VariableType.Int, VariableType.Int, VariableType.Int);
Add(Instruction.ShiftRightS32, VariableType.S32, VariableType.S32, VariableType.Int); Add(Instruction.ShiftRightS32, VariableType.S32, VariableType.S32, VariableType.Int);
Add(Instruction.ShiftRightU32, VariableType.U32, VariableType.U32, VariableType.Int); Add(Instruction.ShiftRightU32, VariableType.U32, VariableType.U32, VariableType.Int);
Add(Instruction.Shuffle, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
Add(Instruction.ShuffleDown, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
Add(Instruction.ShuffleUp, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
Add(Instruction.ShuffleXor, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
Add(Instruction.Maximum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Maximum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.MaximumU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.MaximumU32, VariableType.U32, VariableType.U32, VariableType.U32);
Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
@ -90,6 +99,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.F32); Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.F32);
Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.F32); Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.F32);
Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.SwizzleAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.S32);
Add(Instruction.TextureSample, VariableType.F32); Add(Instruction.TextureSample, VariableType.F32);
Add(Instruction.TextureSize, VariableType.S32, VariableType.S32, VariableType.S32); Add(Instruction.TextureSize, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.Truncate, VariableType.F32, VariableType.F32); Add(Instruction.Truncate, VariableType.F32, VariableType.F32);

View file

@ -179,6 +179,28 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
context.AddNode(new AstOperation(inst, sources)); context.AddNode(new AstOperation(inst, sources));
} }
// Those instructions needs to be emulated by using helper functions,
// because they are NVIDIA specific. Those flags helps the backend to
// decide which helper functions are needed on the final generated code.
switch (operation.Inst)
{
case Instruction.Shuffle:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle;
break;
case Instruction.ShuffleDown:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown;
break;
case Instruction.ShuffleUp:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp;
break;
case Instruction.ShuffleXor:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
break;
case Instruction.SwizzleAdd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
break;
}
} }
private static VariableType GetVarTypeFromUses(Operand dest) private static VariableType GetVarTypeFromUses(Operand dest)

View file

@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
public bool UsesInstanceId { get; set; } public bool UsesInstanceId { get; set; }
public HelperFunctionsMask HelperFunctionsMask { get; set; }
public HashSet<AstTextureOperation> Samplers { get; } public HashSet<AstTextureOperation> Samplers { get; }
public HashSet<AstTextureOperation> Images { get; } public HashSet<AstTextureOperation> Images { get; }

View file

@ -6,6 +6,11 @@ namespace Ryujinx.Graphics.Shader.Translation
{ {
static class EmitterContextInsts static class EmitterContextInsts
{ {
public static Operand BitCount(this EmitterContext context, Operand a)
{
return context.Add(Instruction.BitCount, Local(), a);
}
public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c) public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c)
{ {
return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c); return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c);
@ -106,6 +111,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.EndPrimitive); return context.Add(Instruction.EndPrimitive);
} }
public static Operand FindFirstSetS32(this EmitterContext context, Operand a)
{
return context.Add(Instruction.FindFirstSetS32, Local(), a);
}
public static Operand FindFirstSetU32(this EmitterContext context, Operand a)
{
return context.Add(Instruction.FindFirstSetU32, Local(), a);
}
public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
{ {
return context.FPNegate(context.FPAbsolute(a, abs), neg); return context.FPNegate(context.FPAbsolute(a, abs), neg);
@ -256,6 +271,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.Truncate, Local(), a); return context.Add(Instruction.Truncate, Local(), a);
} }
public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
{
return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask));
}
public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
{ {
return context.INegate(context.IAbsolute(a, abs), neg); return context.INegate(context.IAbsolute(a, abs), neg);
@ -418,6 +438,26 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.ShiftRightU32, Local(), a, b); return context.Add(Instruction.ShiftRightU32, Local(), a, b);
} }
public static Operand Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.Shuffle, Local(), a, b, c);
}
public static Operand ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleDown, Local(), a, b, c);
}
public static Operand ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleUp, Local(), a, b, c);
}
public static Operand ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleXor, Local(), a, b, c);
}
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b) public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b)
{ {
return context.Add(Instruction.StoreGlobal, null, a, b); return context.Add(Instruction.StoreGlobal, null, a, b);

View file

@ -21,6 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
EvaluateBinary(operation, (x, y) => x + y); EvaluateBinary(operation, (x, y) => x + y);
break; break;
case Instruction.BitCount:
EvaluateUnary(operation, (x) => BitCount(x));
break;
case Instruction.BitwiseAnd: case Instruction.BitwiseAnd:
EvaluateBinary(operation, (x, y) => x & y); EvaluateBinary(operation, (x, y) => x & y);
break; break;
@ -208,6 +212,21 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return true; return true;
} }
private static int BitCount(int value)
{
int count = 0;
for (int bit = 0; bit < 32; bit++)
{
if (value.Extract(bit))
{
count++;
}
}
return count;
}
private static void BitfieldExtractS32(Operation operation) private static void BitfieldExtractS32(Operation operation)
{ {
int value = GetBitfieldExtractValue(operation); int value = GetBitfieldExtractValue(operation);

View file

@ -1,5 +1,6 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic; using System.Collections.Generic;
using System.Diagnostics;
using System.Linq; using System.Linq;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations namespace Ryujinx.Graphics.Shader.Translation.Optimizations
@ -59,7 +60,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
modified = true; modified = true;
} }
else if (operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
(operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
{ {
if (operation.Dest.UseOps.Count == 0) if (operation.Dest.UseOps.Count == 0)
{ {
@ -135,6 +137,84 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return modified; return modified;
} }
public static bool MatchDdxOrDdy(Operation operation)
{
// It's assumed that "operation.Inst" is ShuffleXor,
// that should be checked before calling this method.
Debug.Assert(operation.Inst == Instruction.ShuffleXor);
bool modified = false;
Operand src2 = operation.GetSource(1);
Operand src3 = operation.GetSource(2);
if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
{
return false;
}
if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
{
return false;
}
bool isDdy = src2.Value == 2;
bool isDdx = !isDdy;
// We can replace any use by a FSWZADD with DDX/DDY, when
// the following conditions are true:
// - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
// - The first source operand must be the shuffle output.
// - The second source operand must be the shuffle first source operand.
INode[] uses = operation.Dest.UseOps.ToArray();
foreach (INode use in uses)
{
if (!(use is Operation test))
{
continue;
}
if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd)
{
continue;
}
Operand fswzaddSrc1 = useOp.GetSource(0);
Operand fswzaddSrc2 = useOp.GetSource(1);
Operand fswzaddSrc3 = useOp.GetSource(2);
if (fswzaddSrc1 != operation.Dest)
{
continue;
}
if (fswzaddSrc2 != operation.GetSource(0))
{
continue;
}
if (fswzaddSrc3.Type != OperandType.Constant)
{
continue;
}
int mask = fswzaddSrc3.Value;
if ((isDdx && mask != 0b10011001) ||
(isDdy && mask != 0b10100101))
{
continue;
}
useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
modified = true;
}
return modified;
}
private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode) private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
{ {
// Remove a node from the nodes list, and also remove itself // Remove a node from the nodes list, and also remove itself