Initial support for shader half float instructions (#507)

This commit is contained in:
gdkchan 2019-01-31 09:43:24 -03:00 committed by GitHub
parent c81abdde4c
commit e10ff17e2d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 244 additions and 65 deletions

View file

@ -63,6 +63,7 @@ namespace Ryujinx.Graphics.Gal.Shader
private Dictionary<int, ShaderDeclInfo> m_OutAttributes; private Dictionary<int, ShaderDeclInfo> m_OutAttributes;
private Dictionary<int, ShaderDeclInfo> m_Gprs; private Dictionary<int, ShaderDeclInfo> m_Gprs;
private Dictionary<int, ShaderDeclInfo> m_GprsHalf;
private Dictionary<int, ShaderDeclInfo> m_Preds; private Dictionary<int, ShaderDeclInfo> m_Preds;
public IReadOnlyDictionary<ShaderIrOp, ShaderDeclInfo> CbTextures => m_CbTextures; public IReadOnlyDictionary<ShaderIrOp, ShaderDeclInfo> CbTextures => m_CbTextures;
@ -75,6 +76,7 @@ namespace Ryujinx.Graphics.Gal.Shader
public IReadOnlyDictionary<int, ShaderDeclInfo> OutAttributes => m_OutAttributes; public IReadOnlyDictionary<int, ShaderDeclInfo> OutAttributes => m_OutAttributes;
public IReadOnlyDictionary<int, ShaderDeclInfo> Gprs => m_Gprs; public IReadOnlyDictionary<int, ShaderDeclInfo> Gprs => m_Gprs;
public IReadOnlyDictionary<int, ShaderDeclInfo> GprsHalf => m_GprsHalf;
public IReadOnlyDictionary<int, ShaderDeclInfo> Preds => m_Preds; public IReadOnlyDictionary<int, ShaderDeclInfo> Preds => m_Preds;
public GalShaderType ShaderType { get; private set; } public GalShaderType ShaderType { get; private set; }
@ -93,6 +95,7 @@ namespace Ryujinx.Graphics.Gal.Shader
m_OutAttributes = new Dictionary<int, ShaderDeclInfo>(); m_OutAttributes = new Dictionary<int, ShaderDeclInfo>();
m_Gprs = new Dictionary<int, ShaderDeclInfo>(); m_Gprs = new Dictionary<int, ShaderDeclInfo>();
m_GprsHalf = new Dictionary<int, ShaderDeclInfo>();
m_Preds = new Dictionary<int, ShaderDeclInfo>(); m_Preds = new Dictionary<int, ShaderDeclInfo>();
} }
@ -147,6 +150,7 @@ namespace Ryujinx.Graphics.Gal.Shader
Merge(Combined.m_OutAttributes, VpA.m_OutAttributes, VpB.m_OutAttributes); Merge(Combined.m_OutAttributes, VpA.m_OutAttributes, VpB.m_OutAttributes);
Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs); Merge(Combined.m_Gprs, VpA.m_Gprs, VpB.m_Gprs);
Merge(Combined.m_GprsHalf, VpA.m_GprsHalf, VpB.m_GprsHalf);
Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds); Merge(Combined.m_Preds, VpA.m_Preds, VpB.m_Preds);
//Merge input attributes. //Merge input attributes.
@ -343,8 +347,21 @@ namespace Ryujinx.Graphics.Gal.Shader
{ {
string Name = GetGprName(Gpr.Index); string Name = GetGprName(Gpr.Index);
if (Gpr.RegisterSize == ShaderRegisterSize.Single)
{
m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index)); m_Gprs.TryAdd(Gpr.Index, new ShaderDeclInfo(Name, Gpr.Index));
} }
else if (Gpr.RegisterSize == ShaderRegisterSize.Half)
{
Name += "_h" + Gpr.HalfPart;
m_GprsHalf.TryAdd((Gpr.Index << 1) | Gpr.HalfPart, new ShaderDeclInfo(Name, Gpr.Index));
}
else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */
{
throw new NotImplementedException("Double types are not supported.");
}
}
break; break;
} }

View file

@ -364,6 +364,7 @@ namespace Ryujinx.Graphics.Gal.Shader
private void PrintDeclGprs() private void PrintDeclGprs()
{ {
PrintDecls(Decl.Gprs); PrintDecls(Decl.Gprs);
PrintDecls(Decl.GprsHalf);
} }
private void PrintDeclPreds() private void PrintDeclPreds()
@ -897,7 +898,23 @@ namespace Ryujinx.Graphics.Gal.Shader
private string GetName(ShaderIrOperGpr Gpr) private string GetName(ShaderIrOperGpr Gpr)
{ {
return Gpr.IsConst ? "0" : GetNameWithSwizzle(Decl.Gprs, Gpr.Index); if (Gpr.IsConst)
{
return "0";
}
if (Gpr.RegisterSize == ShaderRegisterSize.Single)
{
return GetNameWithSwizzle(Decl.Gprs, Gpr.Index);
}
else if (Gpr.RegisterSize == ShaderRegisterSize.Half)
{
return GetNameWithSwizzle(Decl.GprsHalf, (Gpr.Index << 1) | Gpr.HalfPart);
}
else /* if (Gpr.RegisterSize == ShaderRegisterSize.Double) */
{
throw new NotImplementedException("Double types are not supported.");
}
} }
private string GetValue(ShaderIrOperImm Imm) private string GetValue(ShaderIrOperImm Imm)

View file

@ -6,6 +6,14 @@ namespace Ryujinx.Graphics.Gal.Shader
{ {
static partial class ShaderDecode static partial class ShaderDecode
{ {
private enum HalfOutputType
{
PackedFp16,
Fp32,
MergeH0,
MergeH1
}
public static void Bfe_C(ShaderIrBlock Block, long OpCode, int Position) public static void Bfe_C(ShaderIrBlock Block, long OpCode, int Position)
{ {
EmitBfe(Block, OpCode, ShaderOper.CR); EmitBfe(Block, OpCode, ShaderOper.CR);
@ -144,6 +152,16 @@ namespace Ryujinx.Graphics.Gal.Shader
EmitFsetp(Block, OpCode, ShaderOper.RR); EmitFsetp(Block, OpCode, ShaderOper.RR);
} }
public static void Hadd2_R(ShaderIrBlock Block, long OpCode, int Position)
{
EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fadd);
}
public static void Hmul2_R(ShaderIrBlock Block, long OpCode, int Position)
{
EmitBinaryHalfOp(Block, OpCode, ShaderIrInst.Fmul);
}
public static void Iadd_C(ShaderIrBlock Block, long OpCode, int Position) public static void Iadd_C(ShaderIrBlock Block, long OpCode, int Position)
{ {
EmitIadd(Block, OpCode, ShaderOper.CR); EmitIadd(Block, OpCode, ShaderOper.CR);
@ -1041,6 +1059,47 @@ namespace Ryujinx.Graphics.Gal.Shader
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(P0Node, Op))); Block.AddNode(OpCode.PredNode(new ShaderIrAsg(P0Node, Op)));
} }
private static void EmitBinaryHalfOp(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst)
{
bool AbsB = OpCode.Read(30);
bool NegB = OpCode.Read(31);
bool Sat = OpCode.Read(32);
bool AbsA = OpCode.Read(44);
ShaderIrOperGpr[] VecA = OpCode.GprHalfVec8();
ShaderIrOperGpr[] VecB = OpCode.GprHalfVec20();
HalfOutputType OutputType = (HalfOutputType)OpCode.Read(49, 3);
int Elems = OutputType == HalfOutputType.PackedFp16 ? 2 : 1;
int First = OutputType == HalfOutputType.MergeH1 ? 1 : 0;
for (int Index = First; Index < Elems; Index++)
{
ShaderIrNode OperA = GetAluFabs (VecA[Index], AbsA);
ShaderIrNode OperB = GetAluFabsFneg(VecB[Index], AbsB, NegB);
ShaderIrNode Op = new ShaderIrOp(Inst, OperA, OperB);
ShaderIrOperGpr Dst = GetHalfDst(OpCode, OutputType, Index);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, GetAluFsat(Op, Sat))));
}
}
private static ShaderIrOperGpr GetHalfDst(long OpCode, HalfOutputType OutputType, int Index)
{
switch (OutputType)
{
case HalfOutputType.PackedFp16: return OpCode.GprHalf0(Index);
case HalfOutputType.Fp32: return OpCode.Gpr0();
case HalfOutputType.MergeH0: return OpCode.GprHalf0(0);
case HalfOutputType.MergeH1: return OpCode.GprHalf0(1);
}
throw new ArgumentException(nameof(OutputType));
}
private static void EmitLop(ShaderIrBlock Block, long OpCode, ShaderOper Oper) private static void EmitLop(ShaderIrBlock Block, long OpCode, ShaderOper Oper)
{ {
int SubOp = OpCode.Read(41, 3); int SubOp = OpCode.Read(41, 3);

View file

@ -6,8 +6,6 @@ namespace Ryujinx.Graphics.Gal.Shader
{ {
static partial class ShaderDecode static partial class ShaderDecode
{ {
private const int TempRegStart = 0x100;
private const int ____ = 0x0; private const int ____ = 0x0;
private const int R___ = 0x1; private const int R___ = 0x1;
private const int _G__ = 0x2; private const int _G__ = 0x2;
@ -149,14 +147,18 @@ namespace Ryujinx.Graphics.Gal.Shader
for (int Index = 0; Index < Coords.Length; Index++) for (int Index = 0; Index < Coords.Length; Index++)
{ {
Coords[Index] = OpCode.Gpr8(); ShaderIrOperGpr CoordReg = OpCode.Gpr8();
Coords[Index].Index += Index; CoordReg.Index += Index;
if (Coords[Index].Index > ShaderIrOperGpr.ZRIndex) if (!CoordReg.IsValidRegister)
{ {
Coords[Index].Index = ShaderIrOperGpr.ZRIndex; CoordReg.Index = ShaderIrOperGpr.ZRIndex;
} }
Coords[Index] = ShaderIrOperGpr.MakeTemporary(Index);
Block.AddNode(new ShaderIrAsg(Coords[Index], CoordReg));
} }
int ChMask = OpCode.Read(31, 0xf); int ChMask = OpCode.Read(31, 0xf);
@ -167,17 +169,6 @@ namespace Ryujinx.Graphics.Gal.Shader
ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs; ShaderIrInst Inst = GprHandle ? ShaderIrInst.Texb : ShaderIrInst.Texs;
for (int Ch = 0; Ch < 4; Ch++)
{
ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
int RegInc = 0; int RegInc = 0;
for (int Ch = 0; Ch < 4; Ch++) for (int Ch = 0; Ch < 4; Ch++)
@ -187,18 +178,20 @@ namespace Ryujinx.Graphics.Gal.Shader
continue; continue;
} }
ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrOperGpr Dst = OpCode.Gpr0(); ShaderIrOperGpr Dst = OpCode.Gpr0();
Dst.Index += RegInc++; Dst.Index += RegInc++;
if (Dst.Index >= ShaderIrOperGpr.ZRIndex) if (!Dst.IsValidRegister || Dst.IsConst)
{ {
continue; continue;
} }
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src))); ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coords[0], Coords[1], OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
} }
} }
@ -215,57 +208,81 @@ namespace Ryujinx.Graphics.Gal.Shader
private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst) private static void EmitTexs(ShaderIrBlock Block, long OpCode, ShaderIrInst Inst)
{ {
//TODO: Support other formats. //TODO: Support other formats.
ShaderIrNode OperA = OpCode.Gpr8();
ShaderIrNode OperB = OpCode.Gpr20();
ShaderIrNode OperC = OpCode.Imm13_36();
int LutIndex; int LutIndex;
LutIndex = OpCode.Gpr0 ().Index != ShaderIrOperGpr.ZRIndex ? 1 : 0; LutIndex = !OpCode.Gpr0().IsConst ? 1 : 0;
LutIndex |= OpCode.Gpr28().Index != ShaderIrOperGpr.ZRIndex ? 2 : 0; LutIndex |= !OpCode.Gpr28().IsConst ? 2 : 0;
if (LutIndex == 0) if (LutIndex == 0)
{ {
//Both registers are RZ, color is not written anywhere. //Both destination registers are RZ, do nothing.
//So, the intruction is basically a no-op.
return; return;
} }
int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)]; bool Fp16 = !OpCode.Read(59);
for (int Ch = 0; Ch < 4; Ch++) int DstIncrement = 0;
{
ShaderIrOperGpr Dst = new ShaderIrOperGpr(TempRegStart + Ch);
ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, OperA, OperB, OperC, Meta);
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
}
int RegInc = 0;
ShaderIrOperGpr GetDst() ShaderIrOperGpr GetDst()
{ {
ShaderIrOperGpr Dst; ShaderIrOperGpr Dst;
if (Fp16)
{
//FP16 mode, two components are packed on the two
//halfs of a 32-bits register, as two half-float values.
int HalfPart = DstIncrement & 1;
switch (LutIndex)
{
case 1: Dst = OpCode.GprHalf0(HalfPart); break;
case 2: Dst = OpCode.GprHalf28(HalfPart); break;
case 3: Dst = (DstIncrement >> 1) != 0
? OpCode.GprHalf28(HalfPart)
: OpCode.GprHalf0(HalfPart); break;
default: throw new InvalidOperationException();
}
}
else
{
//32-bits mode, each component uses one register.
//Two components uses two consecutive registers.
switch (LutIndex) switch (LutIndex)
{ {
case 1: Dst = OpCode.Gpr0(); break; case 1: Dst = OpCode.Gpr0(); break;
case 2: Dst = OpCode.Gpr28(); break; case 2: Dst = OpCode.Gpr28(); break;
case 3: Dst = (RegInc >> 1) != 0 case 3: Dst = (DstIncrement >> 1) != 0
? OpCode.Gpr28() ? OpCode.Gpr28()
: OpCode.Gpr0 (); break; : OpCode.Gpr0(); break;
default: throw new InvalidOperationException(); default: throw new InvalidOperationException();
} }
Dst.Index += RegInc++ & 1; Dst.Index += DstIncrement & 1;
}
DstIncrement++;
return Dst; return Dst;
} }
int ChMask = MaskLut[LutIndex, OpCode.Read(50, 7)];
if (ChMask == 0)
{
//All channels are disabled, do nothing.
return;
}
ShaderIrNode OperC = OpCode.Imm13_36();
ShaderIrOperGpr Coord0 = ShaderIrOperGpr.MakeTemporary(0);
ShaderIrOperGpr Coord1 = ShaderIrOperGpr.MakeTemporary(1);
Block.AddNode(new ShaderIrAsg(Coord0, OpCode.Gpr8()));
Block.AddNode(new ShaderIrAsg(Coord1, OpCode.Gpr20()));
for (int Ch = 0; Ch < 4; Ch++) for (int Ch = 0; Ch < 4; Ch++)
{ {
if (!IsChannelUsed(ChMask, Ch)) if (!IsChannelUsed(ChMask, Ch))
@ -273,13 +290,15 @@ namespace Ryujinx.Graphics.Gal.Shader
continue; continue;
} }
ShaderIrOperGpr Src = new ShaderIrOperGpr(TempRegStart + Ch); ShaderIrMetaTex Meta = new ShaderIrMetaTex(Ch);
ShaderIrOp Op = new ShaderIrOp(Inst, Coord0, Coord1, OperC, Meta);
ShaderIrOperGpr Dst = GetDst(); ShaderIrOperGpr Dst = GetDst();
if (Dst.Index != ShaderIrOperGpr.ZRIndex) if (Dst.IsValidRegister && !Dst.IsConst)
{ {
Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Src))); Block.AddNode(OpCode.PredNode(new ShaderIrAsg(Dst, Op)));
} }
} }
} }

View file

@ -75,6 +75,49 @@ namespace Ryujinx.Graphics.Gal.Shader
return new ShaderIrOperGpr(OpCode.Read(28, 0xff)); return new ShaderIrOperGpr(OpCode.Read(28, 0xff));
} }
private static ShaderIrOperGpr[] GprHalfVec8(this long OpCode)
{
return GetGprHalfVec2(OpCode.Read(8, 0xff), OpCode.Read(47, 3));
}
private static ShaderIrOperGpr[] GprHalfVec20(this long OpCode)
{
return GetGprHalfVec2(OpCode.Read(20, 0xff), OpCode.Read(28, 3));
}
private static ShaderIrOperGpr[] GetGprHalfVec2(int Gpr, int Mask)
{
if (Mask == 1)
{
//This value is used for FP32, the whole 32-bits register
//is used as each element on the vector.
return new ShaderIrOperGpr[]
{
new ShaderIrOperGpr(Gpr),
new ShaderIrOperGpr(Gpr)
};
}
ShaderIrOperGpr Low = new ShaderIrOperGpr(Gpr, 0);
ShaderIrOperGpr High = new ShaderIrOperGpr(Gpr, 1);
return new ShaderIrOperGpr[]
{
(Mask & 1) != 0 ? High : Low,
(Mask & 2) != 0 ? High : Low
};
}
private static ShaderIrOperGpr GprHalf0(this long OpCode, int HalfPart)
{
return new ShaderIrOperGpr(OpCode.Read(0, 0xff), HalfPart);
}
private static ShaderIrOperGpr GprHalf28(this long OpCode, int HalfPart)
{
return new ShaderIrOperGpr(OpCode.Read(28, 0xff), HalfPart);
}
private static ShaderIrOperImm Imm5_39(this long OpCode) private static ShaderIrOperImm Imm5_39(this long OpCode)
{ {
return new ShaderIrOperImm(OpCode.Read(39, 0x1f)); return new ShaderIrOperImm(OpCode.Read(39, 0x1f));

View file

@ -6,13 +6,26 @@ namespace Ryujinx.Graphics.Gal.Shader
public bool IsConst => Index == ZRIndex; public bool IsConst => Index == ZRIndex;
public bool IsValidRegister => (Index <= ZRIndex); public bool IsValidRegister => (uint)Index <= ZRIndex;
public int Index { get; set; } public int Index { get; set; }
public int HalfPart { get; set; }
public ShaderRegisterSize RegisterSize { get; private set; }
public ShaderIrOperGpr(int Index) public ShaderIrOperGpr(int Index)
{ {
this.Index = Index; this.Index = Index;
RegisterSize = ShaderRegisterSize.Single;
}
public ShaderIrOperGpr(int Index, int HalfPart)
{
this.Index = Index;
this.HalfPart = HalfPart;
RegisterSize = ShaderRegisterSize.Half;
} }
public static ShaderIrOperGpr MakeTemporary(int Index = 0) public static ShaderIrOperGpr MakeTemporary(int Index = 0)

View file

@ -58,6 +58,8 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("010010111011xx", ShaderDecode.Fsetp_C); Set("010010111011xx", ShaderDecode.Fsetp_C);
Set("0011011x1011xx", ShaderDecode.Fsetp_I); Set("0011011x1011xx", ShaderDecode.Fsetp_I);
Set("010110111011xx", ShaderDecode.Fsetp_R); Set("010110111011xx", ShaderDecode.Fsetp_R);
Set("0101110100010x", ShaderDecode.Hadd2_R);
Set("0101110100001x", ShaderDecode.Hmul2_R);
Set("0100110010111x", ShaderDecode.I2f_C); Set("0100110010111x", ShaderDecode.I2f_C);
Set("0011100x10111x", ShaderDecode.I2f_I); Set("0011100x10111x", ShaderDecode.I2f_I);
Set("0101110010111x", ShaderDecode.I2f_R); Set("0101110010111x", ShaderDecode.I2f_R);
@ -118,7 +120,7 @@ namespace Ryujinx.Graphics.Gal.Shader
Set("110000xxxx111x", ShaderDecode.Tex); Set("110000xxxx111x", ShaderDecode.Tex);
Set("1101111010111x", ShaderDecode.Tex_B); Set("1101111010111x", ShaderDecode.Tex_B);
Set("1101111101001x", ShaderDecode.Texq); Set("1101111101001x", ShaderDecode.Texq);
Set("1101100xxxxxxx", ShaderDecode.Texs); Set("1101x00xxxxxxx", ShaderDecode.Texs);
Set("1101101xxxxxxx", ShaderDecode.Tlds); Set("1101101xxxxxxx", ShaderDecode.Tlds);
Set("01011111xxxxxx", ShaderDecode.Vmad); Set("01011111xxxxxx", ShaderDecode.Vmad);
Set("0100111xxxxxxx", ShaderDecode.Xmad_CR); Set("0100111xxxxxxx", ShaderDecode.Xmad_CR);

View file

@ -0,0 +1,9 @@
namespace Ryujinx.Graphics.Gal.Shader
{
enum ShaderRegisterSize
{
Half,
Single,
Double
}
}

View file

@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Gal
ulong Instruction = 0; ulong Instruction = 0;
//Dump until a NOP instruction is found //Dump until a NOP instruction is found
while ((Instruction >> 52 & 0xfff8) != 0x50b0) while ((Instruction >> 48 & 0xfff8) != 0x50b0)
{ {
uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0); uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0);
uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4); uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4);