From 0dbfe3c23ee072ec9dbc477f955a163107af2be1 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 31 Dec 2019 21:08:02 -0300 Subject: [PATCH] Re-add NVDEC project (not integrated) --- Ryujinx.Graphics.Gpu/GpuContext.cs | 2 +- Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs | 37 +- Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 2 +- Ryujinx.Graphics.Nvdec/CdmaProcessor.cs | 103 ++ Ryujinx.Graphics.Nvdec/ChClassId.cs | 20 + Ryujinx.Graphics.Nvdec/ChCommandEntry.cs | 18 + Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs | 13 + .../Ryujinx.Graphics.Nvdec.csproj | 23 + .../VDec/BitStreamWriter.cs | 75 ++ Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs | 17 + Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs | 168 ++++ Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs | 14 + .../VDec/H264BitStreamWriter.cs | 79 ++ Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs | 238 +++++ Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs | 8 + .../VDec/H264ParameterSets.cs | 34 + Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs | 10 + Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs | 266 ++++++ .../VDec/VideoDecoderMeth.cs | 19 + Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs | 879 ++++++++++++++++++ Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs | 79 ++ Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs | 10 + .../VDec/Vp9ProbabilityTables.cs | 31 + .../VDec/VpxBitStreamWriter.cs | 38 + .../VDec/VpxRangeEncoder.cs | 134 +++ Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs | 69 ++ .../Vic/SurfaceOutputConfig.cs | 33 + .../Vic/SurfacePixelFormat.cs | 8 + .../Vic/VideoImageComposer.cs | 94 ++ .../Vic/VideoImageComposerMeth.cs | 12 + Ryujinx.sln | 32 +- 31 files changed, 2547 insertions(+), 18 deletions(-) create mode 100644 Ryujinx.Graphics.Nvdec/CdmaProcessor.cs create mode 100644 Ryujinx.Graphics.Nvdec/ChClassId.cs create mode 100644 Ryujinx.Graphics.Nvdec/ChCommandEntry.cs create mode 100644 Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs create mode 100644 Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj create mode 100644 Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs create mode 100644 Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index 034cc065b..b644c54dd 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -28,7 +28,7 @@ namespace Ryujinx.Graphics.Gpu /// /// GPU memory accessor. /// - internal MemoryAccessor MemoryAccessor { get; } + public MemoryAccessor MemoryAccessor { get; } /// /// GPU engine methods processing. diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs index a0247acfe..3cbbd2536 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs @@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// GPU mapped memory accessor. /// - class MemoryAccessor + public class MemoryAccessor { private GpuContext _context; @@ -19,6 +19,17 @@ namespace Ryujinx.Graphics.Gpu.Memory _context = context; } + /// + /// Reads a byte array from GPU mapped memory. + /// + /// GPU virtual address where the data is located + /// Size of the data in bytes + /// Byte array with the data + public byte[] ReadBytes(ulong gpuVa, ulong size) + { + return Read(gpuVa, size).ToArray(); + } + /// /// Reads data from GPU mapped memory. /// This reads as much data as possible, up to the specified maximum size. @@ -62,6 +73,30 @@ namespace Ryujinx.Graphics.Gpu.Memory return BitConverter.ToInt32(_context.PhysicalMemory.Read(processVa, 4)); } + /// + /// Reads a 64-bits unsigned integer from GPU mapped memory. + /// + /// GPU virtual address where the value is located + /// The value at the specified memory location + public ulong ReadUInt64(ulong gpuVa) + { + ulong processVa = _context.MemoryManager.Translate(gpuVa); + + return BitConverter.ToUInt64(_context.PhysicalMemory.Read(processVa, 8)); + } + + /// + /// Reads a 8-bits unsigned integer from GPU mapped memory. + /// + /// GPU virtual address where the value is located + /// The value to be written + public void WriteByte(ulong gpuVa, byte value) + { + ulong processVa = _context.MemoryManager.Translate(gpuVa); + + _context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1)); + } + /// /// Writes a 32-bits signed integer to GPU mapped memory. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 62ab0e475..33be04d39 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -252,7 +252,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// GPU virtual address to be translated /// CPU virtual address - internal ulong Translate(ulong gpuVa) + public ulong Translate(ulong gpuVa) { ulong baseAddress = GetPte(gpuVa); diff --git a/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs b/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs new file mode 100644 index 000000000..c54a95f9b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs @@ -0,0 +1,103 @@ +using Ryujinx.Graphics.Gpu; +using Ryujinx.Graphics.VDec; +using Ryujinx.Graphics.Vic; +using System.Collections.Generic; + +namespace Ryujinx.Graphics +{ + public class CdmaProcessor + { + private const int MethSetMethod = 0x10; + private const int MethSetData = 0x11; + + private readonly VideoDecoder _videoDecoder; + private readonly VideoImageComposer _videoImageComposer; + + public CdmaProcessor() + { + _videoDecoder = new VideoDecoder(); + _videoImageComposer = new VideoImageComposer(_videoDecoder); + } + + public void PushCommands(GpuContext gpu, int[] cmdBuffer) + { + List commands = new List(); + + ChClassId currentClass = 0; + + for (int index = 0; index < cmdBuffer.Length; index++) + { + int cmd = cmdBuffer[index]; + + int value = (cmd >> 0) & 0xffff; + int methodOffset = (cmd >> 16) & 0xfff; + + ChSubmissionMode submissionMode = (ChSubmissionMode)((cmd >> 28) & 0xf); + + switch (submissionMode) + { + case ChSubmissionMode.SetClass: currentClass = (ChClassId)(value >> 6); break; + + case ChSubmissionMode.Incrementing: + { + int count = value; + + for (int argIdx = 0; argIdx < count; argIdx++) + { + int argument = cmdBuffer[++index]; + + commands.Add(new ChCommand(currentClass, methodOffset + argIdx, argument)); + } + + break; + } + + case ChSubmissionMode.NonIncrementing: + { + int count = value; + + int[] arguments = new int[count]; + + for (int argIdx = 0; argIdx < count; argIdx++) + { + arguments[argIdx] = cmdBuffer[++index]; + } + + commands.Add(new ChCommand(currentClass, methodOffset, arguments)); + + break; + } + } + } + + ProcessCommands(gpu, commands.ToArray()); + } + + private void ProcessCommands(GpuContext gpu, ChCommand[] commands) + { + int methodOffset = 0; + + foreach (ChCommand command in commands) + { + switch (command.MethodOffset) + { + case MethSetMethod: methodOffset = command.Arguments[0]; break; + + case MethSetData: + { + if (command.ClassId == ChClassId.NvDec) + { + _videoDecoder.Process(gpu, methodOffset, command.Arguments); + } + else if (command.ClassId == ChClassId.GraphicsVic) + { + _videoImageComposer.Process(gpu, methodOffset, command.Arguments); + } + + break; + } + } + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChClassId.cs b/Ryujinx.Graphics.Nvdec/ChClassId.cs new file mode 100644 index 000000000..115f0b89c --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/ChClassId.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics +{ + enum ChClassId + { + Host1X = 0x1, + VideoEncodeMpeg = 0x20, + VideoEncodeNvEnc = 0x21, + VideoStreamingVi = 0x30, + VideoStreamingIsp = 0x32, + VideoStreamingIspB = 0x34, + VideoStreamingViI2c = 0x36, + GraphicsVic = 0x5d, + Graphics3D = 0x60, + GraphicsGpu = 0x61, + Tsec = 0xe0, + TsecB = 0xe1, + NvJpg = 0xc0, + NvDec = 0xf0 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs b/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs new file mode 100644 index 000000000..b01b77eda --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics +{ + struct ChCommand + { + public ChClassId ClassId { get; private set; } + + public int MethodOffset { get; private set; } + + public int[] Arguments { get; private set; } + + public ChCommand(ChClassId classId, int methodOffset, params int[] arguments) + { + ClassId = classId; + MethodOffset = methodOffset; + Arguments = arguments; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs b/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs new file mode 100644 index 000000000..5c6530196 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics +{ + enum ChSubmissionMode + { + SetClass = 0, + Incrementing = 1, + NonIncrementing = 2, + Mask = 3, + Immediate = 4, + Restart = 5, + Gather = 6 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj new file mode 100644 index 000000000..a7bffeb6a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj @@ -0,0 +1,23 @@ + + + + netcoreapp3.1 + + + + true + + + + true + + + + + + + + + + + diff --git a/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs new file mode 100644 index 000000000..db2d39e59 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs @@ -0,0 +1,75 @@ +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class BitStreamWriter + { + private const int BufferSize = 8; + + private Stream _baseStream; + + private int _buffer; + private int _bufferPos; + + public BitStreamWriter(Stream baseStream) + { + _baseStream = baseStream; + } + + public void WriteBit(bool value) + { + WriteBits(value ? 1 : 0, 1); + } + + public void WriteBits(int value, int valueSize) + { + int valuePos = 0; + + int remaining = valueSize; + + while (remaining > 0) + { + int copySize = remaining; + + int free = GetFreeBufferBits(); + + if (copySize > free) + { + copySize = free; + } + + int mask = (1 << copySize) - 1; + + int srcShift = (valueSize - valuePos) - copySize; + int dstShift = (BufferSize - _bufferPos) - copySize; + + _buffer |= ((value >> srcShift) & mask) << dstShift; + + valuePos += copySize; + _bufferPos += copySize; + remaining -= copySize; + } + } + + private int GetFreeBufferBits() + { + if (_bufferPos == BufferSize) + { + Flush(); + } + + return BufferSize - _bufferPos; + } + + public void Flush() + { + if (_bufferPos != 0) + { + _baseStream.WriteByte((byte)_buffer); + + _buffer = 0; + _bufferPos = 0; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs b/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs new file mode 100644 index 000000000..4f17d8d10 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs @@ -0,0 +1,17 @@ +using System; + +namespace Ryujinx.Graphics.VDec +{ + static class DecoderHelper + { + public static byte[] Combine(byte[] arr0, byte[] arr1) + { + byte[] output = new byte[arr0.Length + arr1.Length]; + + Buffer.BlockCopy(arr0, 0, output, 0, arr0.Length); + Buffer.BlockCopy(arr1, 0, output, arr0.Length, arr1.Length); + + return output; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs new file mode 100644 index 000000000..ccd01f0d3 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs @@ -0,0 +1,168 @@ +using FFmpeg.AutoGen; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.VDec +{ + static unsafe class FFmpegWrapper + { + private static AVCodec* _codec; + private static AVCodecContext* _context; + private static AVFrame* _frame; + private static SwsContext* _scalerCtx; + + private static int _scalerWidth; + private static int _scalerHeight; + + public static bool IsInitialized { get; private set; } + + public static void H264Initialize() + { + EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_H264); + } + + public static void Vp9Initialize() + { + EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_VP9); + } + + private static void EnsureCodecInitialized(AVCodecID codecId) + { + if (IsInitialized) + { + Uninitialize(); + } + + _codec = ffmpeg.avcodec_find_decoder(codecId); + _context = ffmpeg.avcodec_alloc_context3(_codec); + _frame = ffmpeg.av_frame_alloc(); + + ffmpeg.avcodec_open2(_context, _codec, null); + + IsInitialized = true; + } + + public static int DecodeFrame(byte[] data) + { + if (!IsInitialized) + { + throw new InvalidOperationException("Tried to use uninitialized codec!"); + } + + AVPacket packet; + + ffmpeg.av_init_packet(&packet); + + fixed (byte* ptr = data) + { + packet.data = ptr; + packet.size = data.Length; + + ffmpeg.avcodec_send_packet(_context, &packet); + } + + return ffmpeg.avcodec_receive_frame(_context, _frame); + } + + public static FFmpegFrame GetFrame() + { + if (!IsInitialized) + { + throw new InvalidOperationException("Tried to use uninitialized codec!"); + } + + AVFrame managedFrame = Marshal.PtrToStructure((IntPtr)_frame); + + byte*[] data = managedFrame.data.ToArray(); + + return new FFmpegFrame() + { + Width = managedFrame.width, + Height = managedFrame.height, + + LumaPtr = data[0], + ChromaBPtr = data[1], + ChromaRPtr = data[2] + }; + } + + public static FFmpegFrame GetFrameRgba() + { + if (!IsInitialized) + { + throw new InvalidOperationException("Tried to use uninitialized codec!"); + } + + AVFrame managedFrame = Marshal.PtrToStructure((IntPtr)_frame); + + EnsureScalerSetup(managedFrame.width, managedFrame.height); + + byte*[] data = managedFrame.data.ToArray(); + + int[] lineSizes = managedFrame.linesize.ToArray(); + + byte[] dst = new byte[managedFrame.width * managedFrame.height * 4]; + + fixed (byte* ptr = dst) + { + byte*[] dstData = new byte*[] { ptr }; + + int[] dstLineSizes = new int[] { managedFrame.width * 4 }; + + ffmpeg.sws_scale(_scalerCtx, data, lineSizes, 0, managedFrame.height, dstData, dstLineSizes); + } + + return new FFmpegFrame() + { + Width = managedFrame.width, + Height = managedFrame.height, + + Data = dst + }; + } + + private static void EnsureScalerSetup(int width, int height) + { + if (width == 0 || height == 0) + { + return; + } + + if (_scalerCtx == null || _scalerWidth != width || _scalerHeight != height) + { + FreeScaler(); + + _scalerCtx = ffmpeg.sws_getContext( + width, height, AVPixelFormat.AV_PIX_FMT_YUV420P, + width, height, AVPixelFormat.AV_PIX_FMT_RGBA, 0, null, null, null); + + _scalerWidth = width; + _scalerHeight = height; + } + } + + public static void Uninitialize() + { + if (IsInitialized) + { + ffmpeg.av_frame_unref(_frame); + ffmpeg.av_free(_frame); + ffmpeg.avcodec_close(_context); + + FreeScaler(); + + IsInitialized = false; + } + } + + private static void FreeScaler() + { + if (_scalerCtx != null) + { + ffmpeg.sws_freeContext(_scalerCtx); + + _scalerCtx = null; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs new file mode 100644 index 000000000..535a70c94 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.VDec +{ + unsafe struct FFmpegFrame + { + public int Width; + public int Height; + + public byte* LumaPtr; + public byte* ChromaBPtr; + public byte* ChromaRPtr; + + public byte[] Data; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs new file mode 100644 index 000000000..b4fad59be --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs @@ -0,0 +1,79 @@ +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class H264BitStreamWriter : BitStreamWriter + { + public H264BitStreamWriter(Stream baseStream) : base(baseStream) { } + + public void WriteU(int value, int valueSize) + { + WriteBits(value, valueSize); + } + + public void WriteSe(int value) + { + WriteExpGolombCodedInt(value); + } + + public void WriteUe(int value) + { + WriteExpGolombCodedUInt((uint)value); + } + + public void End() + { + WriteBit(true); + + Flush(); + } + + private void WriteExpGolombCodedInt(int value) + { + int sign = value <= 0 ? 0 : 1; + + if (value < 0) + { + value = -value; + } + + value = (value << 1) - sign; + + WriteExpGolombCodedUInt((uint)value); + } + + private void WriteExpGolombCodedUInt(uint value) + { + int size = 32 - CountLeadingZeros((int)value + 1); + + WriteBits(1, size); + + value -= (1u << (size - 1)) - 1; + + WriteBits((int)value, size - 1); + } + + private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + private static int CountLeadingZeros(int value) + { + if (value == 0) + { + return 32; + } + + int nibbleIdx = 32; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return count; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs new file mode 100644 index 000000000..24c7e0b92 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs @@ -0,0 +1,238 @@ +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class H264Decoder + { + private int _log2MaxPicOrderCntLsbMinus4; + private bool _deltaPicOrderAlwaysZeroFlag; + private bool _frameMbsOnlyFlag; + private int _picWidthInMbs; + private int _picHeightInMapUnits; + private bool _entropyCodingModeFlag; + private bool _bottomFieldPicOrderInFramePresentFlag; + private int _numRefIdxL0DefaultActiveMinus1; + private int _numRefIdxL1DefaultActiveMinus1; + private bool _deblockingFilterControlPresentFlag; + private bool _redundantPicCntPresentFlag; + private bool _transform8x8ModeFlag; + private bool _mbAdaptiveFrameFieldFlag; + private bool _direct8x8InferenceFlag; + private bool _weightedPredFlag; + private bool _constrainedIntraPredFlag; + private bool _fieldPicFlag; + private bool _bottomFieldFlag; + private int _log2MaxFrameNumMinus4; + private int _chromaFormatIdc; + private int _picOrderCntType; + private int _picInitQpMinus26; + private int _chromaQpIndexOffset; + private int _chromaQpIndexOffset2; + private int _weightedBipredIdc; + private int _frameNumber; + private byte[] _scalingMatrix4; + private byte[] _scalingMatrix8; + + public void Decode(H264ParameterSets Params, H264Matrices matrices, byte[] frameData) + { + _log2MaxPicOrderCntLsbMinus4 = Params.Log2MaxPicOrderCntLsbMinus4; + _deltaPicOrderAlwaysZeroFlag = Params.DeltaPicOrderAlwaysZeroFlag; + _frameMbsOnlyFlag = Params.FrameMbsOnlyFlag; + _picWidthInMbs = Params.PicWidthInMbs; + _picHeightInMapUnits = Params.PicHeightInMapUnits; + _entropyCodingModeFlag = Params.EntropyCodingModeFlag; + _bottomFieldPicOrderInFramePresentFlag = Params.BottomFieldPicOrderInFramePresentFlag; + _numRefIdxL0DefaultActiveMinus1 = Params.NumRefIdxL0DefaultActiveMinus1; + _numRefIdxL1DefaultActiveMinus1 = Params.NumRefIdxL1DefaultActiveMinus1; + _deblockingFilterControlPresentFlag = Params.DeblockingFilterControlPresentFlag; + _redundantPicCntPresentFlag = Params.RedundantPicCntPresentFlag; + _transform8x8ModeFlag = Params.Transform8x8ModeFlag; + + _mbAdaptiveFrameFieldFlag = ((Params.Flags >> 0) & 1) != 0; + _direct8x8InferenceFlag = ((Params.Flags >> 1) & 1) != 0; + _weightedPredFlag = ((Params.Flags >> 2) & 1) != 0; + _constrainedIntraPredFlag = ((Params.Flags >> 3) & 1) != 0; + _fieldPicFlag = ((Params.Flags >> 5) & 1) != 0; + _bottomFieldFlag = ((Params.Flags >> 6) & 1) != 0; + + _log2MaxFrameNumMinus4 = (int)(Params.Flags >> 8) & 0xf; + _chromaFormatIdc = (int)(Params.Flags >> 12) & 0x3; + _picOrderCntType = (int)(Params.Flags >> 14) & 0x3; + _picInitQpMinus26 = (int)(Params.Flags >> 16) & 0x3f; + _chromaQpIndexOffset = (int)(Params.Flags >> 22) & 0x1f; + _chromaQpIndexOffset2 = (int)(Params.Flags >> 27) & 0x1f; + _weightedBipredIdc = (int)(Params.Flags >> 32) & 0x3; + _frameNumber = (int)(Params.Flags >> 46) & 0x1ffff; + + _picInitQpMinus26 = (_picInitQpMinus26 << 26) >> 26; + _chromaQpIndexOffset = (_chromaQpIndexOffset << 27) >> 27; + _chromaQpIndexOffset2 = (_chromaQpIndexOffset2 << 27) >> 27; + + _scalingMatrix4 = matrices.ScalingMatrix4; + _scalingMatrix8 = matrices.ScalingMatrix8; + + if (FFmpegWrapper.IsInitialized) + { + FFmpegWrapper.DecodeFrame(frameData); + } + else + { + FFmpegWrapper.H264Initialize(); + + FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(EncodeHeader(), frameData)); + } + } + + private byte[] EncodeHeader() + { + using (MemoryStream data = new MemoryStream()) + { + H264BitStreamWriter writer = new H264BitStreamWriter(data); + + // Sequence Parameter Set. + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + writer.WriteUe(_chromaFormatIdc); + + if (_chromaFormatIdc == 3) + { + writer.WriteBit(false); + } + + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); + writer.WriteBit(false); //Scaling matrix present flag + + writer.WriteUe(_log2MaxFrameNumMinus4); + writer.WriteUe(_picOrderCntType); + + if (_picOrderCntType == 0) + { + writer.WriteUe(_log2MaxPicOrderCntLsbMinus4); + } + else if (_picOrderCntType == 1) + { + writer.WriteBit(_deltaPicOrderAlwaysZeroFlag); + + writer.WriteSe(0); + writer.WriteSe(0); + writer.WriteUe(0); + } + + int picHeightInMbs = _picHeightInMapUnits / (_frameMbsOnlyFlag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(_picWidthInMbs - 1); + writer.WriteUe(picHeightInMbs - 1); + writer.WriteBit(_frameMbsOnlyFlag); + + if (!_frameMbsOnlyFlag) + { + writer.WriteBit(_mbAdaptiveFrameFieldFlag); + } + + writer.WriteBit(_direct8x8InferenceFlag); + writer.WriteBit(false); //Frame cropping flag + writer.WriteBit(false); //VUI parameter present flag + + writer.End(); + + // Picture Parameter Set. + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(_entropyCodingModeFlag); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(_numRefIdxL0DefaultActiveMinus1); + writer.WriteUe(_numRefIdxL1DefaultActiveMinus1); + writer.WriteBit(_weightedPredFlag); + writer.WriteU(_weightedBipredIdc, 2); + writer.WriteSe(_picInitQpMinus26); + writer.WriteSe(0); + writer.WriteSe(_chromaQpIndexOffset); + writer.WriteBit(_deblockingFilterControlPresentFlag); + writer.WriteBit(_constrainedIntraPredFlag); + writer.WriteBit(_redundantPicCntPresentFlag); + writer.WriteBit(_transform8x8ModeFlag); + + writer.WriteBit(true); + + for (int index = 0; index < 6; index++) + { + writer.WriteBit(true); + + WriteScalingList(writer, _scalingMatrix4, index * 16, 16); + } + + if (_transform8x8ModeFlag) + { + for (int index = 0; index < 2; index++) + { + writer.WriteBit(true); + + WriteScalingList(writer, _scalingMatrix8, index * 64, 64); + } + } + + writer.WriteSe(_chromaQpIndexOffset2); + + writer.End(); + + return data.ToArray(); + } + } + + // ZigZag LUTs from libavcodec. + private static readonly byte[] ZigZagDirect = new byte[] + { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 + }; + + private static readonly byte[] ZigZagScan = new byte[] + { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4 + }; + + private static void WriteScalingList(H264BitStreamWriter writer, byte[] list, int start, int count) + { + byte[] scan = count == 16 ? ZigZagScan : ZigZagDirect; + + int lastScale = 8; + + for (int index = 0; index < count; index++) + { + byte value = list[start + scan[index]]; + + int deltaScale = value - lastScale; + + writer.WriteSe(deltaScale); + + lastScale = value; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs new file mode 100644 index 000000000..a1524214f --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.VDec +{ + struct H264Matrices + { + public byte[] ScalingMatrix4; + public byte[] ScalingMatrix8; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs b/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs new file mode 100644 index 000000000..f242f0f24 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs @@ -0,0 +1,34 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.VDec +{ + [StructLayout(LayoutKind.Sequential, Pack = 4)] + struct H264ParameterSets + { + public int Log2MaxPicOrderCntLsbMinus4; + public bool DeltaPicOrderAlwaysZeroFlag; + public bool FrameMbsOnlyFlag; + public int PicWidthInMbs; + public int PicHeightInMapUnits; + public int Reserved6C; + public bool EntropyCodingModeFlag; + public bool BottomFieldPicOrderInFramePresentFlag; + public int NumRefIdxL0DefaultActiveMinus1; + public int NumRefIdxL1DefaultActiveMinus1; + public bool DeblockingFilterControlPresentFlag; + public bool RedundantPicCntPresentFlag; + public bool Transform8x8ModeFlag; + public int Unknown8C; + public int Unknown90; + public int Reserved94; + public int Unknown98; + public int Reserved9C; + public int ReservedA0; + public int UnknownA4; + public int ReservedA8; + public int UnknownAC; + public long Flags; + public int FrameNumber; + public int FrameNumber2; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs new file mode 100644 index 000000000..f031919dc --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.VDec +{ + enum VideoCodec + { + H264 = 3, + Vp8 = 5, + H265 = 7, + Vp9 = 9 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs new file mode 100644 index 000000000..131bb3cce --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs @@ -0,0 +1,266 @@ +using Ryujinx.Graphics.Gpu; +using Ryujinx.Graphics.Vic; +using System; + +namespace Ryujinx.Graphics.VDec +{ + unsafe class VideoDecoder + { + private H264Decoder _h264Decoder; + private Vp9Decoder _vp9Decoder; + + private VideoCodec _currentVideoCodec; + + private ulong _decoderContextAddress; + private ulong _frameDataAddress; + private ulong _vpxCurrLumaAddress; + private ulong _vpxRef0LumaAddress; + private ulong _vpxRef1LumaAddress; + private ulong _vpxRef2LumaAddress; + private ulong _vpxCurrChromaAddress; + private ulong _vpxRef0ChromaAddress; + private ulong _vpxRef1ChromaAddress; + private ulong _vpxRef2ChromaAddress; + private ulong _vpxProbTablesAddress; + + public VideoDecoder() + { + _h264Decoder = new H264Decoder(); + _vp9Decoder = new Vp9Decoder(); + } + + public void Process(GpuContext gpu, int methodOffset, int[] arguments) + { + VideoDecoderMeth method = (VideoDecoderMeth)methodOffset; + + switch (method) + { + case VideoDecoderMeth.SetVideoCodec: SetVideoCodec(arguments); break; + case VideoDecoderMeth.Execute: Execute(gpu); break; + case VideoDecoderMeth.SetDecoderCtxAddr: SetDecoderCtxAddr(arguments); break; + case VideoDecoderMeth.SetFrameDataAddr: SetFrameDataAddr(arguments); break; + case VideoDecoderMeth.SetVpxCurrLumaAddr: SetVpxCurrLumaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef0LumaAddr: SetVpxRef0LumaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef1LumaAddr: SetVpxRef1LumaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef2LumaAddr: SetVpxRef2LumaAddr(arguments); break; + case VideoDecoderMeth.SetVpxCurrChromaAddr: SetVpxCurrChromaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef0ChromaAddr: SetVpxRef0ChromaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef1ChromaAddr: SetVpxRef1ChromaAddr(arguments); break; + case VideoDecoderMeth.SetVpxRef2ChromaAddr: SetVpxRef2ChromaAddr(arguments); break; + case VideoDecoderMeth.SetVpxProbTablesAddr: SetVpxProbTablesAddr(arguments); break; + } + } + + private void SetVideoCodec(int[] arguments) + { + _currentVideoCodec = (VideoCodec)arguments[0]; + } + + private void Execute(GpuContext gpu) + { + if (_currentVideoCodec == VideoCodec.H264) + { + int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x48); + + H264ParameterSets Params = gpu.MemoryAccessor.Read(_decoderContextAddress + 0x58); + + H264Matrices matrices = new H264Matrices() + { + ScalingMatrix4 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x1c0, 6 * 16), + ScalingMatrix8 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x220, 2 * 64) + }; + + byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, (ulong)frameDataSize); + + _h264Decoder.Decode(Params, matrices, frameData); + } + else if (_currentVideoCodec == VideoCodec.Vp9) + { + int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x30); + + Vp9FrameKeys keys = new Vp9FrameKeys() + { + CurrKey = (long)gpu.MemoryManager.Translate(_vpxCurrLumaAddress), + Ref0Key = (long)gpu.MemoryManager.Translate(_vpxRef0LumaAddress), + Ref1Key = (long)gpu.MemoryManager.Translate(_vpxRef1LumaAddress), + Ref2Key = (long)gpu.MemoryManager.Translate(_vpxRef2LumaAddress) + }; + + Vp9FrameHeader header = gpu.MemoryAccessor.Read(_decoderContextAddress + 0x48); + + Vp9ProbabilityTables probs = new Vp9ProbabilityTables() + { + SegmentationTreeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x387, 0x7), + SegmentationPredProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x38e, 0x3), + Tx8x8Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x470, 0x2), + Tx16x16Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x472, 0x4), + Tx32x32Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x476, 0x6), + CoefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x5a0, 0x900), + SkipProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x537, 0x3), + InterModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x400, 0x1c), + InterpFilterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x52a, 0x8), + IsInterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x41c, 0x4), + CompModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x532, 0x5), + SingleRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x580, 0xa), + CompRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x58a, 0x5), + YModeProbs0 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x480, 0x20), + YModeProbs1 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x47c, 0x4), + PartitionProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x4e0, 0x40), + MvJointProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53b, 0x3), + MvSignProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53e, 0x3), + MvClassProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54c, 0x14), + MvClass0BitProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x540, 0x3), + MvBitsProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x56c, 0x14), + MvClass0FrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x560, 0xc), + MvFrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x542, 0x6), + MvClass0HpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x548, 0x2), + MvHpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54a, 0x2) + }; + + byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, (ulong)frameDataSize); + + _vp9Decoder.Decode(keys, header, probs, frameData); + } + else + { + ThrowUnimplementedCodec(); + } + } + + private void SetDecoderCtxAddr(int[] arguments) + { + _decoderContextAddress = GetAddress(arguments); + } + + private void SetFrameDataAddr(int[] arguments) + { + _frameDataAddress = GetAddress(arguments); + } + + private void SetVpxCurrLumaAddr(int[] arguments) + { + _vpxCurrLumaAddress = GetAddress(arguments); + } + + private void SetVpxRef0LumaAddr(int[] arguments) + { + _vpxRef0LumaAddress = GetAddress(arguments); + } + + private void SetVpxRef1LumaAddr(int[] arguments) + { + _vpxRef1LumaAddress = GetAddress(arguments); + } + + private void SetVpxRef2LumaAddr(int[] arguments) + { + _vpxRef2LumaAddress = GetAddress(arguments); + } + + private void SetVpxCurrChromaAddr(int[] arguments) + { + _vpxCurrChromaAddress = GetAddress(arguments); + } + + private void SetVpxRef0ChromaAddr(int[] arguments) + { + _vpxRef0ChromaAddress = GetAddress(arguments); + } + + private void SetVpxRef1ChromaAddr(int[] arguments) + { + _vpxRef1ChromaAddress = GetAddress(arguments); + } + + private void SetVpxRef2ChromaAddr(int[] arguments) + { + _vpxRef2ChromaAddress = GetAddress(arguments); + } + + private void SetVpxProbTablesAddr(int[] arguments) + { + _vpxProbTablesAddress = GetAddress(arguments); + } + + private static ulong GetAddress(int[] arguments) + { + return (ulong)(uint)arguments[0] << 8; + } + + internal void CopyPlanes(GpuContext gpu, SurfaceOutputConfig outputConfig) + { + switch (outputConfig.PixelFormat) + { + case SurfacePixelFormat.Rgba8: CopyPlanesRgba8 (gpu, outputConfig); break; + case SurfacePixelFormat.Yuv420P: CopyPlanesYuv420P(gpu, outputConfig); break; + + default: ThrowUnimplementedPixelFormat(outputConfig.PixelFormat); break; + } + } + + private void CopyPlanesRgba8(GpuContext gpu, SurfaceOutputConfig outputConfig) + { + FFmpegFrame frame = FFmpegWrapper.GetFrameRgba(); + + if ((frame.Width | frame.Height) == 0) + { + return; + } + + throw new NotImplementedException(); + } + + private void CopyPlanesYuv420P(GpuContext gpu, SurfaceOutputConfig outputConfig) + { + FFmpegFrame frame = FFmpegWrapper.GetFrame(); + + if ((frame.Width | frame.Height) == 0) + { + return; + } + + int halfSrcWidth = frame.Width / 2; + + int halfWidth = frame.Width / 2; + int halfHeight = frame.Height / 2; + + int alignedWidth = (outputConfig.SurfaceWidth + 0xff) & ~0xff; + + for (int y = 0; y < frame.Height; y++) + { + int src = y * frame.Width; + int dst = y * alignedWidth; + + int size = frame.Width; + + for (int offset = 0; offset < size; offset++) + { + gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceLumaAddress + (ulong)dst + (ulong)offset, *(frame.LumaPtr + src + offset)); + } + } + + // Copy chroma data from both channels with interleaving. + for (int y = 0; y < halfHeight; y++) + { + int src = y * halfSrcWidth; + int dst = y * alignedWidth; + + for (int x = 0; x < halfWidth; x++) + { + gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 0, *(frame.ChromaBPtr + src + x)); + gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 1, *(frame.ChromaRPtr + src + x)); + } + } + } + + private void ThrowUnimplementedCodec() + { + throw new NotImplementedException($"Codec \"{_currentVideoCodec}\" is not supported!"); + } + + private void ThrowUnimplementedPixelFormat(SurfacePixelFormat pixelFormat) + { + throw new NotImplementedException($"Pixel format \"{pixelFormat}\" is not supported!"); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs new file mode 100644 index 000000000..12286386a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs @@ -0,0 +1,19 @@ +namespace Ryujinx.Graphics.VDec +{ + enum VideoDecoderMeth + { + SetVideoCodec = 0x80, + Execute = 0xc0, + SetDecoderCtxAddr = 0x101, + SetFrameDataAddr = 0x102, + SetVpxRef0LumaAddr = 0x10c, + SetVpxRef1LumaAddr = 0x10d, + SetVpxRef2LumaAddr = 0x10e, + SetVpxCurrLumaAddr = 0x10f, + SetVpxRef0ChromaAddr = 0x11d, + SetVpxRef1ChromaAddr = 0x11e, + SetVpxRef2ChromaAddr = 0x11f, + SetVpxCurrChromaAddr = 0x120, + SetVpxProbTablesAddr = 0x170 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs new file mode 100644 index 000000000..b20a40bed --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs @@ -0,0 +1,879 @@ +using System.Collections.Generic; +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class Vp9Decoder + { + private const int DiffUpdateProbability = 252; + + private const int FrameSyncCode = 0x498342; + + private static readonly int[] MapLut = new int[] + { + 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, + 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, + 138, 139, 140, 141, 142, 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, + 153, 154, 155, 156, 157, 11, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, + 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, + 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, + 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19 + }; + + private byte[] DefaultTx8x8Probs = new byte[] { 100, 66 }; + private byte[] DefaultTx16x16Probs = new byte[] { 20, 152, 15, 101 }; + private byte[] DefaultTx32x32Probs = new byte[] { 3, 136, 37, 5, 52, 13 }; + + private byte[] _defaultCoefProbs = new byte[] + { + 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, + 17, 82, 140, 0, 8, 66, 114, 0, 2, 44, 76, 0, 1, 19, 32, 0, + 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, 7, 75, 127, 0, + 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, + 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, + 102, 148, 228, 0, 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, + 2, 39, 75, 0, 1, 15, 29, 0, 156, 57, 233, 0, 119, 57, 212, 0, + 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, 3, 12, 31, 0, + 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, + 8, 93, 157, 0, 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, + 41, 151, 213, 0, 27, 123, 193, 0, 3, 82, 144, 0, 1, 58, 105, 0, + 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, 23, 126, 198, 0, + 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, + 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, + 1, 33, 65, 0, 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, + 61, 49, 166, 0, 28, 36, 114, 0, 12, 25, 76, 0, 3, 16, 42, 0, + 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, + 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, + 89, 163, 230, 0, 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, + 2, 42, 81, 0, 1, 17, 33, 0, 108, 167, 237, 0, 55, 133, 222, 0, + 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, 1, 19, 38, 0, + 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, + 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, + 66, 58, 182, 0, 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, + 229, 99, 249, 0, 143, 111, 235, 0, 46, 109, 192, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, 94, 146, 224, 0, + 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, + 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, + 1, 41, 79, 0, 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, + 10, 104, 178, 0, 2, 73, 133, 0, 1, 44, 85, 0, 1, 22, 47, 0, + 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, 3, 61, 124, 0, + 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, + 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, + 125, 34, 187, 0, 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 37, 109, 153, 0, 51, 102, 147, 0, + 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, 1, 19, 29, 0, + 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, + 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, + 2, 93, 148, 0, 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, + 29, 176, 217, 0, 12, 145, 201, 0, 3, 101, 156, 0, 1, 69, 111, 0, + 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, 25, 154, 215, 0, + 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, + 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, + 14, 117, 177, 0, 5, 90, 141, 0, 2, 61, 95, 0, 1, 37, 57, 0, + 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, 1, 60, 104, 0, + 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, + 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, + 32, 186, 224, 0, 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, + 1, 31, 55, 0, 1, 12, 22, 0, 57, 192, 227, 0, 20, 143, 204, 0, + 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, 1, 19, 32, 0, + 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, + 49, 107, 178, 0, 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, + 71, 172, 217, 0, 44, 141, 209, 0, 15, 102, 173, 0, 6, 76, 133, 0, + 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, 31, 148, 216, 0, + 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, + 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, + 1, 38, 69, 0, 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, + 7, 107, 177, 0, 2, 70, 124, 0, 1, 42, 73, 0, 1, 18, 34, 0, + 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, + 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, + 75, 183, 239, 0, 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, + 1, 44, 76, 0, 1, 17, 28, 0, 73, 185, 240, 0, 27, 159, 222, 0, + 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, 1, 17, 29, 0, + 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, + 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, + 4, 113, 180, 0, 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, + 7, 27, 153, 0, 5, 30, 95, 0, 1, 16, 30, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, 57, 75, 124, 0, + 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, + 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, + 1, 38, 60, 0, 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, + 2, 75, 117, 0, 1, 50, 81, 0, 1, 31, 51, 0, 1, 14, 23, 0, + 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, 1, 51, 86, 0, + 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, + 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, + 19, 55, 240, 0, 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 41, 166, 207, 0, 104, 153, 199, 0, + 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, 1, 36, 52, 0, + 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, + 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, + 1, 86, 142, 0, 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, + 20, 190, 215, 0, 4, 135, 192, 0, 1, 84, 139, 0, 1, 53, 91, 0, + 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, 2, 137, 192, 0, + 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, + 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, + 46, 102, 164, 0, 15, 80, 128, 0, 2, 49, 76, 0, 1, 18, 28, 0, + 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, 3, 69, 109, 0, + 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, + 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, + 47, 199, 217, 0, 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, + 1, 36, 62, 0, 1, 15, 26, 0, 26, 219, 229, 0, 5, 155, 207, 0, + 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, 1, 16, 28, 0, + 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, + 63, 142, 204, 0, 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, + 85, 181, 230, 0, 32, 146, 209, 0, 7, 100, 164, 0, 3, 71, 121, 0, + 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, 20, 148, 207, 0, + 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, + 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, + 1, 39, 66, 0, 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, + 1, 98, 160, 0, 1, 67, 117, 0, 1, 41, 74, 0, 1, 17, 31, 0, + 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, + 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, + 50, 127, 154, 0, 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, + 1, 35, 54, 0, 1, 13, 20, 0, 40, 142, 167, 0, 17, 110, 157, 0, + 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, 1, 11, 17, 0, + 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, + 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, + 1, 83, 128, 0, 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, + 36, 41, 235, 0, 29, 36, 193, 0, 10, 27, 111, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, 177, 162, 215, 0, + 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, + 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, + 1, 38, 65, 0, 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, + 2, 86, 140, 0, 1, 56, 97, 0, 1, 36, 61, 0, 1, 16, 27, 0, + 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, 1, 57, 99, 0, + 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, + 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, + 181, 21, 201, 0, 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 47, 106, 172, 0, 95, 104, 173, 0, + 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, 1, 17, 23, 0, + 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, + 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, + 3, 93, 146, 0, 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, + 49, 186, 223, 0, 17, 148, 204, 0, 1, 96, 142, 0, 1, 53, 83, 0, + 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, 2, 136, 180, 0, + 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, + 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, + 176, 177, 234, 0, 104, 158, 220, 0, 66, 128, 186, 0, 55, 90, 137, 0, + 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, 2, 65, 125, 0, + 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, + 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, + 84, 220, 246, 0, 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, + 1, 55, 77, 0, 1, 60, 79, 0, 43, 243, 240, 0, 8, 180, 217, 0, + 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, 1, 16, 6, 0 + }; + + private byte[] _defaultSkipProbs = new byte[] { 192, 128, 64 }; + + private byte[] _defaultInterModeProbs = new byte[] + { + 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, 66, 0, + 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0 + }; + + private byte[] _defaultInterpFilterProbs = new byte[] + { + 235, 162, 36, 255, 34, 3, 149, 144 + }; + + private byte[] _defaultIsInterProbs = new byte[] { 9, 102, 187, 225 }; + + private byte[] _defaultCompModeProbs = new byte[] { 239, 183, 119, 96, 41 }; + + private byte[] _defaultSingleRefProbs = new byte[] + { + 33, 16, 77, 74, 142, 142, 172, 170, 238, 247 + }; + + private byte[] _defaultCompRefProbs = new byte[] { 50, 126, 123, 221, 226 }; + + private byte[] _defaultYModeProbs0 = new byte[] + { + 65, 32, 18, 144, 162, 194, 41, 51, 132, 68, 18, 165, 217, 196, 45, 40, + 173, 80, 19, 176, 240, 193, 64, 35, 221, 135, 38, 194, 248, 121, 96, 85 + }; + + private byte[] _defaultYModeProbs1 = new byte[] { 98, 78, 46, 29 }; + + private byte[] _defaultPartitionProbs = new byte[] + { + 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, + 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, + 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, + 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0 + }; + + private byte[] _defaultMvJointProbs = new byte[] { 32, 64, 96 }; + + private byte[] _defaultMvSignProbs = new byte[] { 128, 128 }; + + private byte[] _defaultMvClassProbs = new byte[] + { + 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, 216, 128, 176, 160, 176, 176, + 192, 198, 198, 208 + }; + + private byte[] _defaultMvClass0BitProbs = new byte[] { 216, 208 }; + + private byte[] _defaultMvBitsProbs = new byte[] + { + 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, 136, 140, 148, 160, 176, 192, + 224, 234, 234, 240 + }; + + private byte[] _defaultMvClass0FrProbs = new byte[] + { + 128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64 + }; + + private byte[] _defaultMvFrProbs = new byte[] { 64, 96, 64, 64, 96, 64 }; + + private byte[] _defaultMvClass0HpProbs = new byte[] { 160, 160 }; + + private byte[] _defaultMvHpProbs = new byte[] { 128, 128 }; + + private sbyte[] _loopFilterRefDeltas; + private sbyte[] _loopFilterModeDeltas; + + private LinkedList _frameSlotByLastUse; + + private Dictionary> _cachedRefFrames; + + public Vp9Decoder() + { + _loopFilterRefDeltas = new sbyte[4]; + _loopFilterModeDeltas = new sbyte[2]; + + _frameSlotByLastUse = new LinkedList(); + + for (int slot = 0; slot < 8; slot++) + { + _frameSlotByLastUse.AddFirst(slot); + } + + _cachedRefFrames = new Dictionary>(); + } + + public void Decode( + Vp9FrameKeys keys, + Vp9FrameHeader header, + Vp9ProbabilityTables probs, + byte[] frameData) + { + bool isKeyFrame = ((header.Flags >> 0) & 1) != 0; + bool lastIsKeyFrame = ((header.Flags >> 1) & 1) != 0; + bool frameSizeChanged = ((header.Flags >> 2) & 1) != 0; + bool errorResilientMode = ((header.Flags >> 3) & 1) != 0; + bool lastShowFrame = ((header.Flags >> 4) & 1) != 0; + bool isFrameIntra = ((header.Flags >> 5) & 1) != 0; + + bool showFrame = !isFrameIntra; + + // Write compressed header. + byte[] compressedHeaderData; + + using (MemoryStream compressedHeader = new MemoryStream()) + { + VpxRangeEncoder writer = new VpxRangeEncoder(compressedHeader); + + if (!header.Lossless) + { + if ((uint)header.TxMode >= 3) + { + writer.Write(3, 2); + writer.Write(header.TxMode == 4); + } + else + { + writer.Write(header.TxMode, 2); + } + } + + if (header.TxMode == 4) + { + WriteProbabilityUpdate(writer, probs.Tx8x8Probs, DefaultTx8x8Probs); + WriteProbabilityUpdate(writer, probs.Tx16x16Probs, DefaultTx16x16Probs); + WriteProbabilityUpdate(writer, probs.Tx32x32Probs, DefaultTx32x32Probs); + } + + WriteCoefProbabilityUpdate(writer, header.TxMode, probs.CoefProbs, _defaultCoefProbs); + + WriteProbabilityUpdate(writer, probs.SkipProbs, _defaultSkipProbs); + + if (!isFrameIntra) + { + WriteProbabilityUpdateAligned4(writer, probs.InterModeProbs, _defaultInterModeProbs); + + if (header.RawInterpolationFilter == 4) + { + WriteProbabilityUpdate(writer, probs.InterpFilterProbs, _defaultInterpFilterProbs); + } + + WriteProbabilityUpdate(writer, probs.IsInterProbs, _defaultIsInterProbs); + + if ((header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[2] & 1) || + (header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[3] & 1)) + { + if ((uint)header.CompPredMode >= 1) + { + writer.Write(1, 1); + writer.Write(header.CompPredMode == 2); + } + else + { + writer.Write(0, 1); + } + } + + if (header.CompPredMode == 2) + { + WriteProbabilityUpdate(writer, probs.CompModeProbs, _defaultCompModeProbs); + } + + if (header.CompPredMode != 1) + { + WriteProbabilityUpdate(writer, probs.SingleRefProbs, _defaultSingleRefProbs); + } + + if (header.CompPredMode != 0) + { + WriteProbabilityUpdate(writer, probs.CompRefProbs, _defaultCompRefProbs); + } + + for (int index = 0; index < 4; index++) + { + int i = index * 8; + int j = index; + + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 0], _defaultYModeProbs0[i + 0]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 1], _defaultYModeProbs0[i + 1]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 2], _defaultYModeProbs0[i + 2]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 3], _defaultYModeProbs0[i + 3]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 4], _defaultYModeProbs0[i + 4]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 5], _defaultYModeProbs0[i + 5]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 6], _defaultYModeProbs0[i + 6]); + WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 7], _defaultYModeProbs0[i + 7]); + WriteProbabilityUpdate(writer, probs.YModeProbs1[j + 0], _defaultYModeProbs1[j + 0]); + } + + WriteProbabilityUpdateAligned4(writer, probs.PartitionProbs, _defaultPartitionProbs); + + for (int i = 0; i < 3; i++) + { + WriteMvProbabilityUpdate(writer, probs.MvJointProbs[i], _defaultMvJointProbs[i]); + } + + for (int i = 0; i < 2; i++) + { + WriteMvProbabilityUpdate(writer, probs.MvSignProbs[i], _defaultMvSignProbs[i]); + + for (int j = 0; j < 10; j++) + { + int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, probs.MvClassProbs[index], _defaultMvClassProbs[index]); + } + + WriteMvProbabilityUpdate(writer, probs.MvClass0BitProbs[i], _defaultMvClass0BitProbs[i]); + + for (int j = 0; j < 10; j++) + { + int index = i * 10 + j; + + WriteMvProbabilityUpdate(writer, probs.MvBitsProbs[index], _defaultMvBitsProbs[index]); + } + } + + for (int i = 0; i < 2; i++) + { + for (int j = 0; j < 2; j++) + { + for (int k = 0; k < 3; k++) + { + int index = i * 2 * 3 + j * 3 + k; + + WriteMvProbabilityUpdate(writer, probs.MvClass0FrProbs[index], _defaultMvClass0FrProbs[index]); + } + } + + for (int j = 0; j < 3; j++) + { + int index = i * 3 + j; + + WriteMvProbabilityUpdate(writer, probs.MvFrProbs[index], _defaultMvFrProbs[index]); + } + } + + if (header.AllowHighPrecisionMv) + { + for (int index = 0; index < 2; index++) + { + WriteMvProbabilityUpdate(writer, probs.MvClass0HpProbs[index], _defaultMvClass0HpProbs[index]); + WriteMvProbabilityUpdate(writer, probs.MvHpProbs[index], _defaultMvHpProbs[index]); + } + } + } + + writer.End(); + + compressedHeaderData = compressedHeader.ToArray(); + } + + // Write uncompressed header. + using (MemoryStream encodedHeader = new MemoryStream()) + { + VpxBitStreamWriter writer = new VpxBitStreamWriter(encodedHeader); + + writer.WriteU(2, 2); //Frame marker. + writer.WriteU(0, 2); //Profile. + writer.WriteBit(false); //Show existing frame. + writer.WriteBit(!isKeyFrame); + writer.WriteBit(showFrame); + writer.WriteBit(errorResilientMode); + + if (isKeyFrame) + { + writer.WriteU(FrameSyncCode, 24); + writer.WriteU(0, 3); //Color space. + writer.WriteU(0, 1); //Color range. + writer.WriteU(header.CurrentFrame.Width - 1, 16); + writer.WriteU(header.CurrentFrame.Height - 1, 16); + writer.WriteBit(false); //Render and frame size different. + + _cachedRefFrames.Clear(); + + // On key frames, all frame slots are set to the current frame, + // so the value of the selected slot doesn't really matter. + GetNewFrameSlot(keys.CurrKey); + } + else + { + if (!showFrame) + { + writer.WriteBit(isFrameIntra); + } + + if (!errorResilientMode) + { + writer.WriteU(0, 2); //Reset frame context. + } + + int refreshFrameFlags = 1 << GetNewFrameSlot(keys.CurrKey); + + if (isFrameIntra) + { + writer.WriteU(FrameSyncCode, 24); + writer.WriteU(refreshFrameFlags, 8); + writer.WriteU(header.CurrentFrame.Width - 1, 16); + writer.WriteU(header.CurrentFrame.Height - 1, 16); + writer.WriteBit(false); //Render and frame size different. + } + else + { + writer.WriteU(refreshFrameFlags, 8); + + int[] refFrameIndex = new int[] + { + GetFrameSlot(keys.Ref0Key), + GetFrameSlot(keys.Ref1Key), + GetFrameSlot(keys.Ref2Key) + }; + + byte[] refFrameSignBias = header.RefFrameSignBias; + + for (int index = 1; index < 4; index++) + { + writer.WriteU(refFrameIndex[index - 1], 3); + writer.WriteU(refFrameSignBias[index], 1); + } + + writer.WriteBit(true); //Frame size with refs. + writer.WriteBit(false); //Render and frame size different. + writer.WriteBit(header.AllowHighPrecisionMv); + writer.WriteBit(header.RawInterpolationFilter == 4); + + if (header.RawInterpolationFilter != 4) + { + writer.WriteU(header.RawInterpolationFilter, 2); + } + } + } + + if (!errorResilientMode) + { + writer.WriteBit(false); //Refresh frame context. + writer.WriteBit(true); //Frame parallel decoding mode. + } + + writer.WriteU(0, 2); //Frame context index. + + writer.WriteU(header.LoopFilterLevel, 6); + writer.WriteU(header.LoopFilterSharpness, 3); + writer.WriteBit(header.LoopFilterDeltaEnabled); + + if (header.LoopFilterDeltaEnabled) + { + bool[] updateLoopFilterRefDeltas = new bool[4]; + bool[] updateLoopFilterModeDeltas = new bool[2]; + + bool loopFilterDeltaUpdate = false; + + for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++) + { + sbyte old = _loopFilterRefDeltas[index]; + sbyte New = header.LoopFilterRefDeltas[index]; + + loopFilterDeltaUpdate |= (updateLoopFilterRefDeltas[index] = old != New); + } + + for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++) + { + sbyte old = _loopFilterModeDeltas[index]; + sbyte New = header.LoopFilterModeDeltas[index]; + + loopFilterDeltaUpdate |= (updateLoopFilterModeDeltas[index] = old != New); + } + + writer.WriteBit(loopFilterDeltaUpdate); + + if (loopFilterDeltaUpdate) + { + for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++) + { + writer.WriteBit(updateLoopFilterRefDeltas[index]); + + if (updateLoopFilterRefDeltas[index]) + { + writer.WriteS(header.LoopFilterRefDeltas[index], 6); + } + } + + for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++) + { + writer.WriteBit(updateLoopFilterModeDeltas[index]); + + if (updateLoopFilterModeDeltas[index]) + { + writer.WriteS(header.LoopFilterModeDeltas[index], 6); + } + } + } + } + + writer.WriteU(header.BaseQIndex, 8); + + writer.WriteDeltaQ(header.DeltaQYDc); + writer.WriteDeltaQ(header.DeltaQUvDc); + writer.WriteDeltaQ(header.DeltaQUvAc); + + writer.WriteBit(false); //Segmentation enabled (TODO). + + int minTileColsLog2 = CalcMinLog2TileCols(header.CurrentFrame.Width); + int maxTileColsLog2 = CalcMaxLog2TileCols(header.CurrentFrame.Width); + + int tileColsLog2Diff = header.TileColsLog2 - minTileColsLog2; + + int tileColsLog2IncMask = (1 << tileColsLog2Diff) - 1; + + // If it's less than the maximum, we need to add an extra 0 on the bitstream + // to indicate that it should stop reading. + if (header.TileColsLog2 < maxTileColsLog2) + { + writer.WriteU(tileColsLog2IncMask << 1, tileColsLog2Diff + 1); + } + else + { + writer.WriteU(tileColsLog2IncMask, tileColsLog2Diff); + } + + bool tileRowsLog2IsNonZero = header.TileRowsLog2 != 0; + + writer.WriteBit(tileRowsLog2IsNonZero); + + if (tileRowsLog2IsNonZero) + { + writer.WriteBit(header.TileRowsLog2 > 1); + } + + writer.WriteU(compressedHeaderData.Length, 16); + + writer.Flush(); + + encodedHeader.Write(compressedHeaderData, 0, compressedHeaderData.Length); + + if (!FFmpegWrapper.IsInitialized) + { + FFmpegWrapper.Vp9Initialize(); + } + + FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(encodedHeader.ToArray(), frameData)); + } + + _loopFilterRefDeltas = header.LoopFilterRefDeltas; + _loopFilterModeDeltas = header.LoopFilterModeDeltas; + } + + private int GetNewFrameSlot(long key) + { + LinkedListNode node = _frameSlotByLastUse.Last; + + _frameSlotByLastUse.RemoveLast(); + _frameSlotByLastUse.AddFirst(node); + + _cachedRefFrames[key] = node; + + return node.Value; + } + + private int GetFrameSlot(long key) + { + if (_cachedRefFrames.TryGetValue(key, out LinkedListNode node)) + { + _frameSlotByLastUse.Remove(node); + _frameSlotByLastUse.AddFirst(node); + + return node.Value; + } + + // Reference frame was lost. + // What we should do in this case? + return 0; + } + + private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte[] New, byte[] old) + { + for (int offset = 0; offset < New.Length; offset++) + { + WriteProbabilityUpdate(writer, New[offset], old[offset]); + } + } + + private void WriteCoefProbabilityUpdate(VpxRangeEncoder writer, int txMode, byte[] New, byte[] old) + { + // Note: There's 1 byte added on each packet for alignment, + // this byte is ignored when doing updates. + const int blockBytes = 2 * 2 * 6 * 6 * 4; + + bool NeedsUpdate(int baseIndex) + { + int index = baseIndex; + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 6; k++) + for (int l = 0; l < 6; l++) + { + if (New[index + 0] != old[index + 0] || + New[index + 1] != old[index + 1] || + New[index + 2] != old[index + 2]) + { + return true; + } + + index += 4; + } + + return false; + } + + for (int blockIndex = 0; blockIndex < 4; blockIndex++) + { + int baseIndex = blockIndex * blockBytes; + + bool update = NeedsUpdate(baseIndex); + + writer.Write(update); + + if (update) + { + int index = baseIndex; + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 6; k++) + for (int l = 0; l < 6; l++) + { + if (k != 0 || l < 3) + { + WriteProbabilityUpdate(writer, New[index + 0], old[index + 0]); + WriteProbabilityUpdate(writer, New[index + 1], old[index + 1]); + WriteProbabilityUpdate(writer, New[index + 2], old[index + 2]); + } + + index += 4; + } + } + + if (blockIndex == txMode) + { + break; + } + } + } + + private void WriteProbabilityUpdateAligned4(VpxRangeEncoder writer, byte[] New, byte[] old) + { + for (int offset = 0; offset < New.Length; offset += 4) + { + WriteProbabilityUpdate(writer, New[offset + 0], old[offset + 0]); + WriteProbabilityUpdate(writer, New[offset + 1], old[offset + 1]); + WriteProbabilityUpdate(writer, New[offset + 2], old[offset + 2]); + } + } + + private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old) + { + bool update = New != old; + + writer.Write(update, DiffUpdateProbability); + + if (update) + { + WriteProbabilityDelta(writer, New, old); + } + } + + private void WriteProbabilityDelta(VpxRangeEncoder writer, int New, int old) + { + int delta = RemapProbability(New, old); + + EncodeTermSubExp(writer, delta); + } + + private int RemapProbability(int New, int old) + { + New--; + old--; + + int index; + + if (old * 2 <= 0xff) + { + index = RecenterNonNeg(New, old) - 1; + } + else + { + index = RecenterNonNeg(0xff - 1 - New, 0xff - 1 - old) - 1; + } + + return MapLut[index]; + } + + private int RecenterNonNeg(int New, int old) + { + if (New > old * 2) + { + return New; + } + else if (New >= old) + { + return (New - old) * 2; + } + else /* if (New < Old) */ + { + return (old - New) * 2 - 1; + } + } + + private void EncodeTermSubExp(VpxRangeEncoder writer, int value) + { + if (WriteLessThan(writer, value, 16)) + { + writer.Write(value, 4); + } + else if (WriteLessThan(writer, value, 32)) + { + writer.Write(value - 16, 4); + } + else if (WriteLessThan(writer, value, 64)) + { + writer.Write(value - 32, 5); + } + else + { + value -= 64; + + const int size = 8; + + int mask = (1 << size) - 191; + + int delta = value - mask; + + if (delta < 0) + { + writer.Write(value, size - 1); + } + else + { + writer.Write(delta / 2 + mask, size - 1); + writer.Write(delta & 1, 1); + } + } + } + + private bool WriteLessThan(VpxRangeEncoder writer, int value, int test) + { + bool isLessThan = value < test; + + writer.Write(!isLessThan); + + return isLessThan; + } + + private void WriteMvProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old) + { + bool update = New != old; + + writer.Write(update, DiffUpdateProbability); + + if (update) + { + writer.Write(New >> 1, 7); + } + } + + private static int CalcMinLog2TileCols(int frameWidth) + { + int sb64Cols = (frameWidth + 63) / 64; + int minLog2 = 0; + + while ((64 << minLog2) < sb64Cols) + { + minLog2++; + } + + return minLog2; + } + + private static int CalcMaxLog2TileCols(int frameWidth) + { + int sb64Cols = (frameWidth + 63) / 64; + int maxLog2 = 1; + + while ((sb64Cols >> maxLog2) >= 4) + { + maxLog2++; + } + + return maxLog2 - 1; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs new file mode 100644 index 000000000..bdba6de5a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs @@ -0,0 +1,79 @@ +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.VDec +{ + [StructLayout(LayoutKind.Sequential, Pack = 2)] + struct Vp9FrameDimensions + { + public short Width; + public short Height; + public short SubsamplingX; //? + public short SubsamplingY; //? + } + + [StructLayout(LayoutKind.Sequential, Pack = 1)] + struct Vp9FrameHeader + { + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)] + public Vp9FrameDimensions[] RefFrames; + + public Vp9FrameDimensions CurrentFrame; + + public int Flags; + + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public byte[] RefFrameSignBias; + + public byte LoopFilterLevel; + public byte LoopFilterSharpness; + + public byte BaseQIndex; + public sbyte DeltaQYDc; + public sbyte DeltaQUvDc; + public sbyte DeltaQUvAc; + + [MarshalAs(UnmanagedType.I1)] + public bool Lossless; + + public byte TxMode; + + [MarshalAs(UnmanagedType.I1)] + public bool AllowHighPrecisionMv; + + public byte RawInterpolationFilter; + public byte CompPredMode; + public byte FixCompRef; + public byte VarCompRef0; + public byte VarCompRef1; + + public byte TileColsLog2; + public byte TileRowsLog2; + + [MarshalAs(UnmanagedType.I1)] + public bool SegmentationEnabled; + + [MarshalAs(UnmanagedType.I1)] + public bool SegmentationUpdate; + + [MarshalAs(UnmanagedType.I1)] + public bool SegmentationTemporalUpdate; + + [MarshalAs(UnmanagedType.I1)] + public bool SegmentationAbsOrDeltaUpdate; + + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4, ArraySubType = UnmanagedType.I1)] + public bool[] FeatureEnabled; + + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4)] + public short[] FeatureData; + + [MarshalAs(UnmanagedType.I1)] + public bool LoopFilterDeltaEnabled; + + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] + public sbyte[] LoopFilterRefDeltas; + + [MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)] + public sbyte[] LoopFilterModeDeltas; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs new file mode 100644 index 000000000..dfc31ea31 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.VDec +{ + struct Vp9FrameKeys + { + public long CurrKey; + public long Ref0Key; + public long Ref1Key; + public long Ref2Key; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs new file mode 100644 index 000000000..5a6dd0cf2 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs @@ -0,0 +1,31 @@ +namespace Ryujinx.Graphics.VDec +{ + struct Vp9ProbabilityTables + { + public byte[] SegmentationTreeProbs; + public byte[] SegmentationPredProbs; + public byte[] Tx8x8Probs; + public byte[] Tx16x16Probs; + public byte[] Tx32x32Probs; + public byte[] CoefProbs; + public byte[] SkipProbs; + public byte[] InterModeProbs; + public byte[] InterpFilterProbs; + public byte[] IsInterProbs; + public byte[] CompModeProbs; + public byte[] SingleRefProbs; + public byte[] CompRefProbs; + public byte[] YModeProbs0; + public byte[] YModeProbs1; + public byte[] PartitionProbs; + public byte[] MvJointProbs; + public byte[] MvSignProbs; + public byte[] MvClassProbs; + public byte[] MvClass0BitProbs; + public byte[] MvBitsProbs; + public byte[] MvClass0FrProbs; + public byte[] MvFrProbs; + public byte[] MvClass0HpProbs; + public byte[] MvHpProbs; + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs new file mode 100644 index 000000000..97ada333e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs @@ -0,0 +1,38 @@ +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class VpxBitStreamWriter : BitStreamWriter + { + public VpxBitStreamWriter(Stream baseStream) : base(baseStream) { } + + public void WriteU(int value, int valueSize) + { + WriteBits(value, valueSize); + } + + public void WriteS(int value, int valueSize) + { + bool sign = value < 0; + + if (sign) + { + value = -value; + } + + WriteBits((value << 1) | (sign ? 1 : 0), valueSize + 1); + } + + public void WriteDeltaQ(int value) + { + bool deltaCoded = value != 0; + + WriteBit(deltaCoded); + + if (deltaCoded) + { + WriteBits(value, 4); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs new file mode 100644 index 000000000..c854c9d9d --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs @@ -0,0 +1,134 @@ +using System.IO; + +namespace Ryujinx.Graphics.VDec +{ + class VpxRangeEncoder + { + private const int HalfProbability = 128; + + private static readonly int[] NormLut = new int[] + { + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + private Stream _baseStream; + + private uint _lowValue; + private uint _range; + private int _count; + + public VpxRangeEncoder(Stream baseStream) + { + _baseStream = baseStream; + + _range = 0xff; + _count = -24; + + Write(false); + } + + public void WriteByte(byte value) + { + Write(value, 8); + } + + public void Write(int value, int valueSize) + { + for (int bit = valueSize - 1; bit >= 0; bit--) + { + Write(((value >> bit) & 1) != 0); + } + } + + public void Write(bool bit) + { + Write(bit, HalfProbability); + } + + public void Write(bool bit, int probability) + { + uint range = _range; + + uint split = 1 + (((range - 1) * (uint)probability) >> 8); + + range = split; + + if (bit) + { + _lowValue += split; + range = _range - split; + } + + int shift = NormLut[range]; + + range <<= shift; + _count += shift; + + if (_count >= 0) + { + int offset = shift - _count; + + if (((_lowValue << (offset - 1)) >> 31) != 0) + { + long currentPos = _baseStream.Position; + + _baseStream.Seek(-1, SeekOrigin.Current); + + while (_baseStream.Position >= 0 && PeekByte() == 0xff) + { + _baseStream.WriteByte(0); + + _baseStream.Seek(-2, SeekOrigin.Current); + } + + _baseStream.WriteByte((byte)(PeekByte() + 1)); + + _baseStream.Seek(currentPos, SeekOrigin.Begin); + } + + _baseStream.WriteByte((byte)(_lowValue >> (24 - offset))); + + _lowValue <<= offset; + shift = _count; + _lowValue &= 0xffffff; + _count -= 8; + } + + _lowValue <<= shift; + + _range = range; + } + + private byte PeekByte() + { + byte value = (byte)_baseStream.ReadByte(); + + _baseStream.Seek(-1, SeekOrigin.Current); + + return value; + } + + public void End() + { + for (int index = 0; index < 32; index++) + { + Write(false); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs b/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs new file mode 100644 index 000000000..4957e6b63 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs @@ -0,0 +1,69 @@ +using Ryujinx.Graphics.Gpu.Memory; +using System; + +namespace Ryujinx.Graphics.Vic +{ + class StructUnpacker + { + private MemoryAccessor _vmm; + + private ulong _position; + + private ulong _buffer; + private int _buffPos; + + public StructUnpacker(MemoryAccessor vmm, ulong position) + { + _vmm = vmm; + _position = position; + + _buffPos = 64; + } + + public int Read(int bits) + { + if ((uint)bits > 32) + { + throw new ArgumentOutOfRangeException(nameof(bits)); + } + + int value = 0; + + while (bits > 0) + { + RefillBufferIfNeeded(); + + int readBits = bits; + + int maxReadBits = 64 - _buffPos; + + if (readBits > maxReadBits) + { + readBits = maxReadBits; + } + + value <<= readBits; + + value |= (int)(_buffer >> _buffPos) & (int)(0xffffffff >> (32 - readBits)); + + _buffPos += readBits; + + bits -= readBits; + } + + return value; + } + + private void RefillBufferIfNeeded() + { + if (_buffPos >= 64) + { + _buffer = _vmm.ReadUInt64(_position); + + _position += 8; + + _buffPos = 0; + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs new file mode 100644 index 000000000..bcb01e70b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs @@ -0,0 +1,33 @@ +namespace Ryujinx.Graphics.Vic +{ + struct SurfaceOutputConfig + { + public SurfacePixelFormat PixelFormat; + + public int SurfaceWidth; + public int SurfaceHeight; + public int GobBlockHeight; + + public ulong SurfaceLumaAddress; + public ulong SurfaceChromaUAddress; + public ulong SurfaceChromaVAddress; + + public SurfaceOutputConfig( + SurfacePixelFormat pixelFormat, + int surfaceWidth, + int surfaceHeight, + int gobBlockHeight, + ulong outputSurfaceLumaAddress, + ulong outputSurfaceChromaUAddress, + ulong outputSurfaceChromaVAddress) + { + PixelFormat = pixelFormat; + SurfaceWidth = surfaceWidth; + SurfaceHeight = surfaceHeight; + GobBlockHeight = gobBlockHeight; + SurfaceLumaAddress = outputSurfaceLumaAddress; + SurfaceChromaUAddress = outputSurfaceChromaUAddress; + SurfaceChromaVAddress = outputSurfaceChromaVAddress; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs new file mode 100644 index 000000000..8dabd0942 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Vic +{ + enum SurfacePixelFormat + { + Rgba8 = 0x1f, + Yuv420P = 0x44 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs new file mode 100644 index 000000000..e16a25233 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs @@ -0,0 +1,94 @@ +using Ryujinx.Graphics.Gpu; +using Ryujinx.Graphics.VDec; + +namespace Ryujinx.Graphics.Vic +{ + class VideoImageComposer + { + private ulong _configStructAddress; + private ulong _outputSurfaceLumaAddress; + private ulong _outputSurfaceChromaUAddress; + private ulong _outputSurfaceChromaVAddress; + + private VideoDecoder _vdec; + + public VideoImageComposer(VideoDecoder vdec) + { + _vdec = vdec; + } + + public void Process(GpuContext gpu, int methodOffset, int[] arguments) + { + VideoImageComposerMeth method = (VideoImageComposerMeth)methodOffset; + + switch (method) + { + case VideoImageComposerMeth.Execute: Execute(gpu); break; + case VideoImageComposerMeth.SetConfigStructOffset: SetConfigStructOffset(arguments); break; + case VideoImageComposerMeth.SetOutputSurfaceLumaOffset: SetOutputSurfaceLumaOffset(arguments); break; + case VideoImageComposerMeth.SetOutputSurfaceChromaUOffset: SetOutputSurfaceChromaUOffset(arguments); break; + case VideoImageComposerMeth.SetOutputSurfaceChromaVOffset: SetOutputSurfaceChromaVOffset(arguments); break; + } + } + + private void Execute(GpuContext gpu) + { + StructUnpacker unpacker = new StructUnpacker(gpu.MemoryAccessor, _configStructAddress + 0x20); + + SurfacePixelFormat pixelFormat = (SurfacePixelFormat)unpacker.Read(7); + + int chromaLocHoriz = unpacker.Read(2); + int chromaLocVert = unpacker.Read(2); + + int blockLinearKind = unpacker.Read(4); + int blockLinearHeightLog2 = unpacker.Read(4); + + int reserved0 = unpacker.Read(3); + int reserved1 = unpacker.Read(10); + + int surfaceWidthMinus1 = unpacker.Read(14); + int surfaceHeightMinus1 = unpacker.Read(14); + + int gobBlockHeight = 1 << blockLinearHeightLog2; + + int surfaceWidth = surfaceWidthMinus1 + 1; + int surfaceHeight = surfaceHeightMinus1 + 1; + + SurfaceOutputConfig outputConfig = new SurfaceOutputConfig( + pixelFormat, + surfaceWidth, + surfaceHeight, + gobBlockHeight, + _outputSurfaceLumaAddress, + _outputSurfaceChromaUAddress, + _outputSurfaceChromaVAddress); + + _vdec.CopyPlanes(gpu, outputConfig); + } + + private void SetConfigStructOffset(int[] arguments) + { + _configStructAddress = GetAddress(arguments); + } + + private void SetOutputSurfaceLumaOffset(int[] arguments) + { + _outputSurfaceLumaAddress = GetAddress(arguments); + } + + private void SetOutputSurfaceChromaUOffset(int[] arguments) + { + _outputSurfaceChromaUAddress = GetAddress(arguments); + } + + private void SetOutputSurfaceChromaVOffset(int[] arguments) + { + _outputSurfaceChromaVAddress = GetAddress(arguments); + } + + private static ulong GetAddress(int[] arguments) + { + return (ulong)(uint)arguments[0] << 8; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs new file mode 100644 index 000000000..b30cabeaa --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Vic +{ + enum VideoImageComposerMeth + { + Execute = 0xc0, + SetControlParams = 0x1c1, + SetConfigStructOffset = 0x1c2, + SetOutputSurfaceLumaOffset = 0x1c8, + SetOutputSurfaceChromaUOffset = 0x1c9, + SetOutputSurfaceChromaVOffset = 0x1ca + } +} \ No newline at end of file diff --git a/Ryujinx.sln b/Ryujinx.sln index 1d21ff586..4ad74077c 100644 --- a/Ryujinx.sln +++ b/Ryujinx.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.26730.8 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29613.14 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx", "Ryujinx\Ryujinx.csproj", "{074045D4-3ED2-4711-9169-E385F2BFB5A0}" EndProject @@ -26,15 +26,17 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Profiler", "Ryujinx EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ARMeilleure", "ARMeilleure\ARMeilleure.csproj", "{ABF09A5E-2D8B-4B6F-A51D-5CE414DDB15A}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Gpu", "Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj", "{ADA7EA87-0D63-4D97-9433-922A2124401F}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Gpu", "Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj", "{ADA7EA87-0D63-4D97-9433-922A2124401F}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.GAL", "Ryujinx.Graphics.GAL\Ryujinx.Graphics.GAL.csproj", "{A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.GAL", "Ryujinx.Graphics.GAL\Ryujinx.Graphics.GAL.csproj", "{A602AE97-91A5-4608-8DF1-EBF4ED7A0B9E}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.OpenGL", "Ryujinx.Graphics.OpenGL\Ryujinx.Graphics.OpenGL.csproj", "{9558FB96-075D-4219-8FFF-401979DC0B69}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.OpenGL", "Ryujinx.Graphics.OpenGL\Ryujinx.Graphics.OpenGL.csproj", "{9558FB96-075D-4219-8FFF-401979DC0B69}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Texture", "Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj", "{E1B1AD28-289D-47B7-A106-326972240207}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Texture", "Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj", "{E1B1AD28-289D-47B7-A106-326972240207}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Shader", "Ryujinx.Graphics.Shader\Ryujinx.Graphics.Shader.csproj", "{03B955CD-AD84-4B93-AAA7-BF17923BBAA5}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Shader", "Ryujinx.Graphics.Shader\Ryujinx.Graphics.Shader.csproj", "{03B955CD-AD84-4B93-AAA7-BF17923BBAA5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ryujinx.Graphics.Nvdec", "Ryujinx.Graphics.Nvdec\Ryujinx.Graphics.Nvdec.csproj", "{85A0FA56-DC01-4A42-8808-70DAC76BD66D}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -76,14 +78,6 @@ Global {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Profile Release|Any CPU.Build.0 = Profile Release|Any CPU {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Release|Any CPU.ActiveCfg = Release|Any CPU {CB92CFF9-1D62-4D4F-9E88-8130EF61E351}.Release|Any CPU.Build.0 = Release|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Debug|Any CPU.Build.0 = Debug|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Profile Debug|Any CPU.ActiveCfg = Profile Debug|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Profile Debug|Any CPU.Build.0 = Profile Debug|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Profile Release|Any CPU.ActiveCfg = Profile Release|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Profile Release|Any CPU.Build.0 = Profile Release|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Release|Any CPU.ActiveCfg = Release|Any CPU - {2345A1A7-8DEF-419B-9AFB-4DFD41D20D05}.Release|Any CPU.Build.0 = Release|Any CPU {5C1D818E-682A-46A5-9D54-30006E26C270}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {5C1D818E-682A-46A5-9D54-30006E26C270}.Debug|Any CPU.Build.0 = Debug|Any CPU {5C1D818E-682A-46A5-9D54-30006E26C270}.Profile Debug|Any CPU.ActiveCfg = Profile Debug|Any CPU @@ -172,6 +166,14 @@ Global {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Profile Release|Any CPU.Build.0 = Debug|Any CPU {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Release|Any CPU.ActiveCfg = Release|Any CPU {03B955CD-AD84-4B93-AAA7-BF17923BBAA5}.Release|Any CPU.Build.0 = Release|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {85A0FA56-DC01-4A42-8808-70DAC76BD66D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE