From 1586450a38caabdfe62c10391c28f52f0c88372e Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 25 Mar 2020 11:49:10 -0300 Subject: [PATCH] Implement VMNMX shader instruction (#1032) * Implement VMNMX shader instruction * No need for the gap on the enum * Fix typo --- .../Decoders/OpCodeTable.cs | 1 + .../Decoders/OpCodeVideo.cs | 85 +++++++++++- .../Decoders/VideoPostOp.cs | 14 ++ Ryujinx.Graphics.Shader/Decoders/VideoType.cs | 15 +++ .../Instructions/InstEmitVideo.cs | 121 +++++++++++++++++- 5 files changed, 232 insertions(+), 4 deletions(-) create mode 100644 Ryujinx.Graphics.Shader/Decoders/VideoPostOp.cs create mode 100644 Ryujinx.Graphics.Shader/Decoders/VideoType.cs diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs index 3878dd872..4579d6509 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs @@ -205,6 +205,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex)); Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex)); Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo)); + Set("0011101xxxxxxx", InstEmit.Vmnmx, typeof(OpCodeVideo)); Set("0101000011011x", InstEmit.Vote, typeof(OpCodeVote)); Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf)); Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm)); diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeVideo.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeVideo.cs index 15dcfa981..c2bdc22f4 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeVideo.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeVideo.cs @@ -6,19 +6,98 @@ namespace Ryujinx.Graphics.Shader.Decoders { public Register Rd { get; } public Register Ra { get; } + public Register Rb { get; } public Register Rc { get; } - public bool SetCondCode { get; protected set; } - public bool Saturate { get; protected set; } + public int Immediate { get; } + + public int RaSelection { get; } + public int RbSelection { get; } + + public bool SetCondCode { get; } + + public bool HasRb { get; } + + public VideoType RaType { get; } + public VideoType RbType { get; } + + public VideoPostOp PostOp { get; } + + public bool DstSigned { get; } + public bool Saturate { get; } public OpCodeVideo(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode) { Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr); Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr); + Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr); Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr); + RaSelection = opCode.Extract(36, 2); + RbSelection = opCode.Extract(28, 2); + + RaType = opCode.Extract(37, 2) switch + { + 2 => VideoType.U16, + 3 => VideoType.U32, + _ => VideoType.U8 + }; + + RbType = opCode.Extract(29, 2) switch + { + 2 => VideoType.U16, + 3 => VideoType.U32, + _ => VideoType.U8 + }; + + if (opCode.Extract(48)) + { + RaType |= VideoType.Signed; + } + + if (!opCode.Extract(50)) + { + // Immediate variant. + Immediate = opCode.Extract(16, 20); + + RbType = opCode.Extract(49) ? VideoType.S16 : VideoType.U16; + + if (RbType == VideoType.S16) + { + Immediate = (Immediate << 12) >> 12; + } + } + else if (opCode.Extract(49)) + { + RbType |= VideoType.Signed; + } + + if (RaType == VideoType.U16) + { + RaSelection &= 1; + } + else if (RaType == VideoType.U32) + { + RaSelection = 0; + } + + if (RbType == VideoType.U16) + { + RbSelection &= 1; + } + else if (RbType == VideoType.U32) + { + RbSelection = 0; + } + SetCondCode = opCode.Extract(47); - Saturate = opCode.Extract(55); + + HasRb = opCode.Extract(50); + + PostOp = (VideoPostOp)opCode.Extract(51, 3); + + DstSigned = opCode.Extract(54); + Saturate = opCode.Extract(55); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/VideoPostOp.cs b/Ryujinx.Graphics.Shader/Decoders/VideoPostOp.cs new file mode 100644 index 000000000..658917897 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/VideoPostOp.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum VideoPostOp + { + Mrg16h, + Mrg16l, + Mrg8b0, + Mrg8b2, + Acc, + Min, + Max, + Pass + } +} diff --git a/Ryujinx.Graphics.Shader/Decoders/VideoType.cs b/Ryujinx.Graphics.Shader/Decoders/VideoType.cs new file mode 100644 index 000000000..7186e7c33 --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/VideoType.cs @@ -0,0 +1,15 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum VideoType + { + U8 = 0, + U16 = 1, + U32 = 2, + + Signed = 1 << 2, + + S8 = Signed | U8, + S16 = Signed | U16, + S32 = Signed | U32 + } +} diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideo.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideo.cs index aac12c781..7b3d9ca51 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideo.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideo.cs @@ -1,17 +1,136 @@ using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.Translation; using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; namespace Ryujinx.Graphics.Shader.Instructions { static partial class InstEmit { public static void Vmad(EmitterContext context) + { + // TODO: Implement properly. + context.Copy(GetDest(context), GetSrcC(context)); + } + + public static void Vmnmx(EmitterContext context) { OpCodeVideo op = (OpCodeVideo)context.CurrOp; - context.Copy(GetDest(context), GetSrcC(context)); + bool max = op.RawOpCode.Extract(56); + + Operand srcA = Extend(context, GetSrcA(context), op.RaSelection, op.RaType); + Operand srcC = GetSrcC(context); + + Operand srcB; + + if (op.HasRb) + { + srcB = Extend(context, Register(op.Rb), op.RbSelection, op.RbType); + } + else + { + srcB = Const(op.Immediate); + } + + Operand res; + + bool resSigned; + + if ((op.RaType & VideoType.Signed) != (op.RbType & VideoType.Signed)) + { + // Signedness is different, but for max, result will always fit a U32, + // since one of the inputs can't be negative, and the result is the one + // with highest value. For min, it will always fit on a S32, since + // one of the input can't be greater than INT_MAX and we want the lowest value. + resSigned = !max; + + res = max ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB); + + if ((op.RaType & VideoType.Signed) != 0) + { + Operand isBGtIntMax = context.ICompareLess(srcB, Const(0)); + + res = context.ConditionalSelect(isBGtIntMax, srcB, res); + } + else + { + Operand isAGtIntMax = context.ICompareLess(srcA, Const(0)); + + res = context.ConditionalSelect(isAGtIntMax, srcA, res); + } + } + else + { + // Ra and Rb have the same signedness, so doesn't matter which one we test. + resSigned = (op.RaType & VideoType.Signed) != 0; + + if (max) + { + res = resSigned + ? context.IMaximumS32(srcA, srcB) + : context.IMaximumU32(srcA, srcB); + } + else + { + res = resSigned + ? context.IMinimumS32(srcA, srcB) + : context.IMinimumU32(srcA, srcB); + } + } + + if (op.Saturate) + { + if (op.DstSigned && !resSigned) + { + res = context.IMinimumU32(res, Const(int.MaxValue)); + } + else if (!op.DstSigned && resSigned) + { + res = context.IMaximumS32(res, Const(0)); + } + } + + switch (op.PostOp) + { + case VideoPostOp.Acc: + res = context.IAdd(res, srcC); + break; + case VideoPostOp.Max: + res = op.DstSigned ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC); + break; + case VideoPostOp.Min: + res = op.DstSigned ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC); + break; + case VideoPostOp.Mrg16h: + res = context.BitfieldInsert(srcC, res, Const(16), Const(16)); + break; + case VideoPostOp.Mrg16l: + res = context.BitfieldInsert(srcC, res, Const(0), Const(16)); + break; + case VideoPostOp.Mrg8b0: + res = context.BitfieldInsert(srcC, res, Const(0), Const(8)); + break; + case VideoPostOp.Mrg8b2: + res = context.BitfieldInsert(srcC, res, Const(16), Const(8)); + break; + } + + context.Copy(GetDest(context), res); + } + + private static Operand Extend(EmitterContext context, Operand src, int sel, VideoType type) + { + return type switch + { + VideoType.U8 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(sel * 8)), 8), + VideoType.U16 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(sel * 16)), 16), + VideoType.S8 => SignExtendTo32(context, context.ShiftRightU32(src, Const(sel * 8)), 8), + VideoType.S16 => SignExtendTo32(context, context.ShiftRightU32(src, Const(sel * 16)), 16), + _ => src + }; } } } \ No newline at end of file