From f4f496cb48a59aae36e3252baa90396e1bfadd2e Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 28 Sep 2021 19:43:40 -0300 Subject: [PATCH] NVDEC (H264): Use separate contexts per channel and decode frames in DTS order (#2671) * Use separate NVDEC contexts per channel (for FFMPEG) * Remove NVDEC -> VIC frame override hack * Add missing bottom_field_pic_order_in_frame_present_flag * Make FFMPEG logging static * nit: Remove empty lines * New FFMPEG decoding approach -- call h264_decode_frame directly, trim surface cache to reduce memory usage * Fix case * Silence warnings * PR feedback * Per-decoder rather than per-codec ownership of surfaces on the cache --- .../IDeviceStateWithContext.cs | 9 + Ryujinx.Graphics.Host1x/Host1xDevice.cs | 61 ++++++- Ryujinx.Graphics.Host1x/ThiDevice.cs | 53 +++++- Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs | 57 ++++-- .../SpsAndPpsReconstruction.cs | 2 +- Ryujinx.Graphics.Nvdec/H264Decoder.cs | 23 +-- Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs | 171 ++++++++++-------- Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs | 21 +++ Ryujinx.Graphics.Nvdec/NvdecDevice.cs | 45 ++++- Ryujinx.Graphics.Nvdec/NvdecStatus.cs | 16 ++ .../Types/H264/PictureInfo.cs | 18 +- .../Types/H264/ReferenceFrame.cs | 13 +- .../Types/Vp9/FrameStats.cs | 1 - Ryujinx.Graphics.Nvdec/Vp9Decoder.cs | 6 +- Ryujinx.Graphics.Vic/VicDevice.cs | 38 +--- Ryujinx.Graphics.Vic/VicRegisters.cs | 2 + Ryujinx.HLE/HOS/Services/Nv/Host1xContext.cs | 15 -- .../NvHostChannel/NvHostChannelDeviceFile.cs | 7 +- 18 files changed, 358 insertions(+), 200 deletions(-) create mode 100644 Ryujinx.Graphics.Device/IDeviceStateWithContext.cs create mode 100644 Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs create mode 100644 Ryujinx.Graphics.Nvdec/NvdecStatus.cs diff --git a/Ryujinx.Graphics.Device/IDeviceStateWithContext.cs b/Ryujinx.Graphics.Device/IDeviceStateWithContext.cs new file mode 100644 index 00000000..17b2fa21 --- /dev/null +++ b/Ryujinx.Graphics.Device/IDeviceStateWithContext.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Device +{ + public interface IDeviceStateWithContext : IDeviceState + { + long CreateContext(); + void DestroyContext(long id); + void BindContext(long id); + } +} diff --git a/Ryujinx.Graphics.Host1x/Host1xDevice.cs b/Ryujinx.Graphics.Host1x/Host1xDevice.cs index 7f6cef62..61408fc4 100644 --- a/Ryujinx.Graphics.Host1x/Host1xDevice.cs +++ b/Ryujinx.Graphics.Host1x/Host1xDevice.cs @@ -9,8 +9,20 @@ namespace Ryujinx.Graphics.Host1x { public sealed class Host1xDevice : IDisposable { + private struct Command + { + public int[] Buffer { get; } + public long ContextId { get; } + + public Command(int[] buffer, long contextId) + { + Buffer = buffer; + ContextId = contextId; + } + } + private readonly SyncptIncrManager _syncptIncrMgr; - private readonly AsyncWorkQueue _commandQueue; + private readonly AsyncWorkQueue _commandQueue; private readonly Devices _devices = new Devices(); @@ -26,7 +38,7 @@ namespace Ryujinx.Graphics.Host1x public Host1xDevice(SynchronizationManager syncMgr) { _syncptIncrMgr = new SyncptIncrManager(syncMgr); - _commandQueue = new AsyncWorkQueue(Process, "Ryujinx.Host1xProcessor"); + _commandQueue = new AsyncWorkQueue(Process, "Ryujinx.Host1xProcessor"); Class = new Host1xClass(syncMgr); @@ -39,13 +51,52 @@ namespace Ryujinx.Graphics.Host1x _devices.RegisterDevice(classId, thi); } - public void Submit(ReadOnlySpan commandBuffer) + public long CreateContext() { - _commandQueue.Add(commandBuffer.ToArray()); + if (_devices.GetDevice(ClassId.Nvdec) is IDeviceStateWithContext nvdec) + { + return nvdec.CreateContext(); + } + + return -1; } - private void Process(int[] commandBuffer) + public void DestroyContext(long id) { + if (id == -1) + { + return; + } + + if (_devices.GetDevice(ClassId.Nvdec) is IDeviceStateWithContext nvdec) + { + nvdec.DestroyContext(id); + } + } + + private void SetNvdecContext(long id) + { + if (id == -1) + { + return; + } + + if (_devices.GetDevice(ClassId.Nvdec) is IDeviceStateWithContext nvdec) + { + nvdec.BindContext(id); + } + } + + public void Submit(ReadOnlySpan commandBuffer, long contextId) + { + _commandQueue.Add(new Command(commandBuffer.ToArray(), contextId)); + } + + private void Process(Command command) + { + SetNvdecContext(command.ContextId); + int[] commandBuffer = command.Buffer; + for (int index = 0; index < commandBuffer.Length; index++) { Step(commandBuffer[index]); diff --git a/Ryujinx.Graphics.Host1x/ThiDevice.cs b/Ryujinx.Graphics.Host1x/ThiDevice.cs index 8e3e11b0..114ee26e 100644 --- a/Ryujinx.Graphics.Host1x/ThiDevice.cs +++ b/Ryujinx.Graphics.Host1x/ThiDevice.cs @@ -5,19 +5,24 @@ using System.Collections.Generic; namespace Ryujinx.Graphics.Host1x { - class ThiDevice : IDeviceState, IDisposable + class ThiDevice : IDeviceStateWithContext, IDisposable { private readonly ClassId _classId; private readonly IDeviceState _device; private readonly SyncptIncrManager _syncptIncrMgr; + private long _currentContextId; + private long _previousContextId; + private class CommandAction { + public long ContextId { get; } public int Data { get; } - public CommandAction(int data) + public CommandAction(long contextId, int data) { + ContextId = contextId; Data = data; } } @@ -26,7 +31,7 @@ namespace Ryujinx.Graphics.Host1x { public int Method { get; } - public MethodCallAction(int method, int data) : base(data) + public MethodCallAction(long contextId, int method, int data) : base(contextId, data) { Method = method; } @@ -34,7 +39,7 @@ namespace Ryujinx.Graphics.Host1x private class SyncptIncrAction : CommandAction { - public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle) + public SyncptIncrAction(long contextId, uint syncptIncrHandle) : base(contextId, (int)syncptIncrHandle) { } } @@ -54,6 +59,31 @@ namespace Ryujinx.Graphics.Host1x { nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) }, { nameof(ThiRegisters.Method1), new RwCallback(Method1, null) } }); + + _previousContextId = -1; + } + + public long CreateContext() + { + if (_device is IDeviceStateWithContext deviceWithContext) + { + return deviceWithContext.CreateContext(); + } + + return -1; + } + + public void DestroyContext(long id) + { + if (_device is IDeviceStateWithContext deviceWithContext) + { + deviceWithContext.DestroyContext(id); + } + } + + public void BindContext(long id) + { + _currentContextId = id; } public int Read(int offset) => _state.Read(offset); @@ -70,17 +100,28 @@ namespace Ryujinx.Graphics.Host1x } else { - _commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId))); + _commandQueue.Add(new SyncptIncrAction(_currentContextId, _syncptIncrMgr.IncrementWhenDone(_classId, syncpointId))); } } private void Method1(int data) { - _commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data)); + _commandQueue.Add(new MethodCallAction(_currentContextId, (int)_state.State.Method0 * 4, data)); } private void Process(CommandAction cmdAction) { + long contextId = cmdAction.ContextId; + if (contextId != _previousContextId) + { + _previousContextId = contextId; + + if (_device is IDeviceStateWithContext deviceWithContext) + { + deviceWithContext.BindContext(contextId); + } + } + if (cmdAction is SyncptIncrAction syncptIncrAction) { _syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data); diff --git a/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs index 17de1a03..7676093f 100644 --- a/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs +++ b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs @@ -9,30 +9,34 @@ namespace Ryujinx.Graphics.Nvdec.H264 { unsafe class FFmpegContext : IDisposable { - private readonly av_log_set_callback_callback _logFunc; + private readonly AVCodec_decode _h264Decode; + private static readonly av_log_set_callback_callback _logFunc; private readonly AVCodec* _codec; private AVPacket* _packet; private AVCodecContext* _context; public FFmpegContext() { - _logFunc = Log; - - // Redirect log output - ffmpeg.av_log_set_level(ffmpeg.AV_LOG_MAX_OFFSET); - ffmpeg.av_log_set_callback(_logFunc); - _codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264); _context = ffmpeg.avcodec_alloc_context3(_codec); + _context->debug |= ffmpeg.FF_DEBUG_MMCO; ffmpeg.avcodec_open2(_context, _codec, null); _packet = ffmpeg.av_packet_alloc(); + + _h264Decode = Marshal.GetDelegateForFunctionPointer(_codec->decode.Pointer); } static FFmpegContext() { SetRootPath(); + + _logFunc = Log; + + // Redirect log output. + ffmpeg.av_log_set_level(ffmpeg.AV_LOG_MAX_OFFSET); + ffmpeg.av_log_set_callback(_logFunc); } private static void SetRootPath() @@ -64,7 +68,7 @@ namespace Ryujinx.Graphics.Nvdec.H264 } } - private void Log(void* p0, int level, string format, byte* vl) + private static void Log(void* p0, int level, string format, byte* vl) { if (level > ffmpeg.av_log_get_level()) { @@ -102,23 +106,40 @@ namespace Ryujinx.Graphics.Nvdec.H264 public int DecodeFrame(Surface output, ReadOnlySpan bitstream) { - // Ensure the packet is clean before proceeding - ffmpeg.av_packet_unref(_packet); + int result; + int gotFrame; fixed (byte* ptr = bitstream) { _packet->data = ptr; _packet->size = bitstream.Length; - - int rc = ffmpeg.avcodec_send_packet(_context, _packet); - - if (rc != 0) - { - return rc; - } + result = _h264Decode(_context, output.Frame, &gotFrame, _packet); } - return ffmpeg.avcodec_receive_frame(_context, output.Frame); + if (gotFrame == 0) + { + ffmpeg.av_frame_unref(output.Frame); + + // If the frame was not delivered, it was probably delayed. + // Get the next delayed frame by passing a 0 length packet. + _packet->data = null; + _packet->size = 0; + result = _h264Decode(_context, output.Frame, &gotFrame, _packet); + + // We need to set B frames to 0 as we already consumed all delayed frames. + // This prevents the decoder from trying to return a delayed frame next time. + _context->has_b_frames = 0; + } + + ffmpeg.av_packet_unref(_packet); + + if (gotFrame == 0) + { + ffmpeg.av_frame_unref(output.Frame); + return -1; + } + + return result < 0 ? result : 0; } public void Dispose() diff --git a/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs index e75c555e..6fd1ce79 100644 --- a/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs +++ b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs @@ -74,7 +74,7 @@ namespace Ryujinx.Graphics.Nvdec.H264 writer.WriteUe(0); // Seq parameter set id writer.WriteBit(pictureInfo.EntropyCodingModeFlag); - writer.WriteBit(false); // Bottom field pic order in frame present flag + writer.WriteBit(pictureInfo.PicOrderPresentFlag); writer.WriteUe(0); // Num slice groups minus 1 writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1); writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1); diff --git a/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/H264Decoder.cs index 57ce12d0..1ee3997b 100644 --- a/Ryujinx.Graphics.Nvdec/H264Decoder.cs +++ b/Ryujinx.Graphics.Nvdec/H264Decoder.cs @@ -10,9 +10,7 @@ namespace Ryujinx.Graphics.Nvdec { private const int MbSizeInPixels = 16; - private static readonly Decoder _decoder = new Decoder(); - - public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + public unsafe static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state) { PictureInfo pictureInfo = rm.Gmm.DeviceRead(state.SetPictureInfoOffset); H264PictureInfo info = pictureInfo.Convert(); @@ -22,19 +20,18 @@ namespace Ryujinx.Graphics.Nvdec int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels; int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels; - ISurface outputSurface = rm.Cache.Get(_decoder, CodecId.H264, 0, 0, width, height); + int surfaceIndex = (int)pictureInfo.OutputSurfaceIndex; - if (_decoder.Decode(ref info, outputSurface, bitstream)) + uint lumaOffset = state.SetSurfaceLumaOffset[surfaceIndex]; + uint chromaOffset = state.SetSurfaceChromaOffset[surfaceIndex]; + + Decoder decoder = context.GetDecoder(); + + ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, width, height); + + if (decoder.Decode(ref info, outputSurface, bitstream)) { - int li = (int)pictureInfo.LumaOutputSurfaceIndex; - int ci = (int)pictureInfo.ChromaOutputSurfaceIndex; - - uint lumaOffset = state.SetSurfaceLumaOffset[li]; - uint chromaOffset = state.SetSurfaceChromaOffset[ci]; - SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); - - device.OnFrameDecoded(CodecId.H264, lumaOffset, chromaOffset); } rm.Cache.Put(outputSurface); diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs index c362185f..dc119673 100644 --- a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs @@ -21,7 +21,7 @@ namespace Ryujinx.Graphics.Nvdec.Image public uint ChromaOffset; public int Width; public int Height; - public CodecId CodecId; + public IDecoder Owner; public ISurface Surface; } @@ -34,104 +34,110 @@ namespace Ryujinx.Graphics.Nvdec.Image _gmm = gmm; } - public ISurface Get(IDecoder decoder, CodecId codecId, uint lumaOffset, uint chromaOffset, int width, int height) + public ISurface Get(IDecoder decoder, uint lumaOffset, uint chromaOffset, int width, int height) { - ISurface surface = null; - - // Try to find a compatible surface with same parameters, and same offsets. - for (int i = 0; i < MaxItems; i++) + lock (_pool) { - ref CacheItem item = ref _pool[i]; + ISurface surface = null; - if (item.LumaOffset == lumaOffset && - item.ChromaOffset == chromaOffset && - item.CodecId == codecId && - item.Width == width && - item.Height == height) - { - item.ReferenceCount++; - surface = item.Surface; - MoveToFront(i); - break; - } - } - - // If we failed to find a perfect match, now ignore the offsets. - // Search backwards to replace the oldest compatible surface, - // this avoids thrashing frquently used surfaces. - // Now we need to ensure that the surface is not in use, as we'll change the data. - if (surface == null) - { - for (int i = MaxItems - 1; i >= 0; i--) + // Try to find a compatible surface with same parameters, and same offsets. + for (int i = 0; i < MaxItems; i++) { ref CacheItem item = ref _pool[i]; - if (item.ReferenceCount == 0 && item.CodecId == codecId && item.Width == width && item.Height == height) + if (item.LumaOffset == lumaOffset && + item.ChromaOffset == chromaOffset && + item.Owner == decoder && + item.Width == width && + item.Height == height) { - item.ReferenceCount = 1; - item.LumaOffset = lumaOffset; - item.ChromaOffset = chromaOffset; + item.ReferenceCount++; surface = item.Surface; + MoveToFront(i); + break; + } + } + + // If we failed to find a perfect match, now ignore the offsets. + // Search backwards to replace the oldest compatible surface, + // this avoids thrashing frequently used surfaces. + // Now we need to ensure that the surface is not in use, as we'll change the data. + if (surface == null) + { + for (int i = MaxItems - 1; i >= 0; i--) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0 && item.Owner == decoder && item.Width == width && item.Height == height) + { + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + surface = item.Surface; + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(i); + break; + } + } + } + + // If everything else failed, we try to create a new surface, + // and insert it on the pool. We replace the oldest item on the + // pool to avoid thrashing frequently used surfaces. + // If even the oldest item is in use, that means that the entire pool + // is in use, in that case we throw as there's no place to insert + // the new surface. + if (surface == null) + { + if (_pool[MaxItems - 1].ReferenceCount == 0) + { + surface = decoder.CreateSurface(width, height); if ((lumaOffset | chromaOffset) != 0) { SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); } - MoveToFront(i); - break; + MoveToFront(MaxItems - 1); + ref CacheItem item = ref _pool[0]; + item.Surface?.Dispose(); + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + item.Width = width; + item.Height = height; + item.Owner = decoder; + item.Surface = surface; } - } - } - - // If everything else failed, we try to create a new surface, - // and insert it on the pool. We replace the oldest item on the - // pool to avoid thrashing frequently used surfaces. - // If even the oldest item is in use, that means that the entire pool - // is in use, in that case we throw as there's no place to insert - // the new surface. - if (surface == null) - { - if (_pool[MaxItems - 1].ReferenceCount == 0) - { - surface = decoder.CreateSurface(width, height); - - if ((lumaOffset | chromaOffset) != 0) + else { - SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + throw new InvalidOperationException("No free slot on the surface pool."); } + } - MoveToFront(MaxItems - 1); - ref CacheItem item = ref _pool[0]; - item.Surface?.Dispose(); - item.ReferenceCount = 1; - item.LumaOffset = lumaOffset; - item.ChromaOffset = chromaOffset; - item.Width = width; - item.Height = height; - item.CodecId = codecId; - item.Surface = surface; - } - else - { - throw new InvalidOperationException("No free slot on the surface pool."); - } + return surface; } - - return surface; } public void Put(ISurface surface) { - for (int i = 0; i < MaxItems; i++) + lock (_pool) { - ref CacheItem item = ref _pool[i]; - - if (item.Surface == surface) + for (int i = 0; i < MaxItems; i++) { - item.ReferenceCount--; - Debug.Assert(item.ReferenceCount >= 0); - break; + ref CacheItem item = ref _pool[i]; + + if (item.Surface == surface) + { + item.ReferenceCount--; + Debug.Assert(item.ReferenceCount >= 0); + break; + } } } } @@ -147,5 +153,22 @@ namespace Ryujinx.Graphics.Nvdec.Image _pool[0] = temp; } } + + public void Trim() + { + lock (_pool) + { + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0) + { + item.Surface?.Dispose(); + item = default; + } + } + } + } } } diff --git a/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs b/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs new file mode 100644 index 00000000..90da0bee --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs @@ -0,0 +1,21 @@ +using Ryujinx.Graphics.Nvdec.H264; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + class NvdecDecoderContext : IDisposable + { + private Decoder _decoder; + + public Decoder GetDecoder() + { + return _decoder ??= new Decoder(); + } + + public void Dispose() + { + _decoder?.Dispose(); + _decoder = null; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs index 08f802a1..5319429b 100644 --- a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs +++ b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs @@ -2,17 +2,20 @@ using Ryujinx.Graphics.Device; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Nvdec.Image; -using System; +using System.Collections.Concurrent; using System.Collections.Generic; +using System.Threading; namespace Ryujinx.Graphics.Nvdec { - public class NvdecDevice : IDeviceState + public class NvdecDevice : IDeviceStateWithContext { private readonly ResourceManager _rm; private readonly DeviceState _state; - public event Action FrameDecoded; + private long _currentId; + private ConcurrentDictionary _contexts; + private NvdecDecoderContext _currentContext; public NvdecDevice(MemoryManager gmm) { @@ -21,6 +24,33 @@ namespace Ryujinx.Graphics.Nvdec { { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) } }); + _contexts = new ConcurrentDictionary(); + } + + public long CreateContext() + { + long id = Interlocked.Increment(ref _currentId); + _contexts.TryAdd(id, new NvdecDecoderContext()); + + return id; + } + + public void DestroyContext(long id) + { + if (_contexts.TryRemove(id, out var context)) + { + context.Dispose(); + } + + _rm.Cache.Trim(); + } + + public void BindContext(long id) + { + if (_contexts.TryGetValue(id, out var context)) + { + _currentContext = context; + } } public int Read(int offset) => _state.Read(offset); @@ -36,20 +66,15 @@ namespace Ryujinx.Graphics.Nvdec switch (codecId) { case CodecId.H264: - H264Decoder.Decode(this, _rm, ref _state.State); + H264Decoder.Decode(_currentContext, _rm, ref _state.State); break; case CodecId.Vp9: - Vp9Decoder.Decode(this, _rm, ref _state.State); + Vp9Decoder.Decode(_rm, ref _state.State); break; default: Logger.Error?.Print(LogClass.Nvdec, $"Unsupported codec \"{codecId}\"."); break; } } - - internal void OnFrameDecoded(CodecId codecId, uint lumaOffset, uint chromaOffset) - { - FrameDecoded?.Invoke(new FrameDecodedEventArgs(codecId, lumaOffset, chromaOffset)); - } } } diff --git a/Ryujinx.Graphics.Nvdec/NvdecStatus.cs b/Ryujinx.Graphics.Nvdec/NvdecStatus.cs new file mode 100644 index 00000000..0712af88 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecStatus.cs @@ -0,0 +1,16 @@ +using Ryujinx.Graphics.Nvdec.Types.Vp9; + +namespace Ryujinx.Graphics.Nvdec +{ + struct NvdecStatus + { +#pragma warning disable CS0649 + public uint MbsCorrectlyDecoded; + public uint MbsInError; + public uint Reserved; + public uint ErrorStatus; + public FrameStats Stats; + public uint SliceHeaderErrorCode; +#pragma warning restore CS0649 + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs index 86570342..326c40ae 100644 --- a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs +++ b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs @@ -26,13 +26,13 @@ namespace Ryujinx.Graphics.Nvdec.Types.H264 public uint Transform8x8ModeFlag; public uint LumaPitch; public uint ChromaPitch; - public uint Unknown94; - public uint LumaSecondFieldOffset; - public uint Unknown9C; - public uint UnknownA0; - public uint ChromaSecondFieldOffset; - public uint UnknownA8; - public uint UnknownAC; + public uint LumaTopOffset; + public uint LumaBottomOffset; + public uint LumaFrameOffset; + public uint ChromaTopOffset; + public uint ChromaBottomFieldOffset; + public uint ChromaFrameOffset; + public uint HistBufferSize; public ulong Flags; public Array2 FieldOrderCnt; public Array16 RefFrames; @@ -64,8 +64,8 @@ namespace Ryujinx.Graphics.Nvdec.Types.H264 public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5); public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5); public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3; - public uint LumaOutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; - public uint ChromaOutputSurfaceIndex => (uint)(Flags >> 41) & 0x1f; + public uint OutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; + public uint ColIndex => (uint)(Flags >> 41) & 0x1f; public ushort FrameNum => (ushort)(Flags >> 46); public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0; diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs index 163a8783..d205a47a 100644 --- a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs +++ b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs @@ -1,12 +1,15 @@ -namespace Ryujinx.Graphics.Nvdec.Types.H264 +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.H264 { struct ReferenceFrame { #pragma warning disable CS0649 - public uint Unknown0; - public uint Unknown4; - public uint Unknown8; - public uint UnknownC; + public uint Flags; + public Array2 FieldOrderCnt; + public uint FrameNum; #pragma warning restore CS0649 + + public uint OutputSurfaceIndex => (uint)Flags & 0x7f; } } diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs index c360d378..26aab506 100644 --- a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs @@ -16,7 +16,6 @@ public uint FrameStatusBwdMvyCnt; public uint ErrorCtbPos; public uint ErrorSlicePos; - public uint Unknown34; #pragma warning restore CS0649 } } diff --git a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs index f05555c6..b56dc56e 100644 --- a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs +++ b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs @@ -15,14 +15,14 @@ namespace Ryujinx.Graphics.Nvdec { private static Decoder _decoder = new Decoder(); - public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + public unsafe static void Decode(ResourceManager rm, ref NvdecRegisters state) { PictureInfo pictureInfo = rm.Gmm.DeviceRead(state.SetPictureInfoOffset); EntropyProbs entropy = rm.Gmm.DeviceRead(state.SetVp9EntropyProbsOffset); ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size) { - return rm.Cache.Get(_decoder, CodecId.Vp9, lumaOffset, chromaOffset, size.Width, size.Height); + return rm.Cache.Get(_decoder, lumaOffset, chromaOffset, size.Width, size.Height); } ISurface lastSurface = Rent(state.SetSurfaceLumaOffset[0], state.SetSurfaceChromaOffset[0], pictureInfo.LastFrameSize); @@ -60,8 +60,6 @@ namespace Ryujinx.Graphics.Nvdec if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut)) { SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset); - - device.OnFrameDecoded(CodecId.Vp9, lumaOffset, chromaOffset); } WriteBackwardUpdates(rm.Gmm, state.SetVp9BackwardUpdatesOffset, ref info.BackwardUpdateCounts); diff --git a/Ryujinx.Graphics.Vic/VicDevice.cs b/Ryujinx.Graphics.Vic/VicDevice.cs index db4fe513..21021c58 100644 --- a/Ryujinx.Graphics.Vic/VicDevice.cs +++ b/Ryujinx.Graphics.Vic/VicDevice.cs @@ -1,9 +1,7 @@ -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Device; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Vic.Image; using Ryujinx.Graphics.Vic.Types; -using System; using System.Collections.Generic; namespace Ryujinx.Graphics.Vic @@ -14,9 +12,6 @@ namespace Ryujinx.Graphics.Vic private readonly ResourceManager _rm; private readonly DeviceState _state; - private PlaneOffsets _overrideOffsets; - private bool _hasOverride; - public VicDevice(MemoryManager gmm) { _gmm = gmm; @@ -27,32 +22,6 @@ namespace Ryujinx.Graphics.Vic }); } - /// - /// Overrides all input surfaces with a custom surface. - /// - /// Offset of the luma plane or packed data for this surface - /// Offset of the U chroma plane (for planar formats) or both chroma planes (for semiplanar formats) - /// Offset of the V chroma plane for planar formats - public void SetSurfaceOverride(uint lumaOffset, uint chromaUOffset, uint chromaVOffset) - { - _overrideOffsets.LumaOffset = lumaOffset; - _overrideOffsets.ChromaUOffset = chromaUOffset; - _overrideOffsets.ChromaVOffset = chromaVOffset; - _hasOverride = true; - } - - /// - /// Disables overriding input surfaces. - /// - /// - /// Surface overrides are disabled by default. - /// Call this if you previously called and which to disable it. - /// - public void DisableSurfaceOverride() - { - _hasOverride = false; - } - public int Read(int offset) => _state.Read(offset); public void Write(int offset, int data) => _state.Write(offset, data); @@ -76,11 +45,6 @@ namespace Ryujinx.Graphics.Vic var offsets = _state.State.SetSurfacexSlotx[i][0]; - if (_hasOverride) - { - offsets = _overrideOffsets; - } - using Surface src = SurfaceReader.Read(_rm, ref slot.SlotSurfaceConfig, ref offsets); Blender.BlendOne(output, src, ref slot); diff --git a/Ryujinx.Graphics.Vic/VicRegisters.cs b/Ryujinx.Graphics.Vic/VicRegisters.cs index 74d41ade..1c11b554 100644 --- a/Ryujinx.Graphics.Vic/VicRegisters.cs +++ b/Ryujinx.Graphics.Vic/VicRegisters.cs @@ -4,9 +4,11 @@ namespace Ryujinx.Graphics.Vic { struct PlaneOffsets { +#pragma warning disable CS0649 public uint LumaOffset; public uint ChromaUOffset; public uint ChromaVOffset; +#pragma warning restore CS0649 } struct VicRegisters diff --git a/Ryujinx.HLE/HOS/Services/Nv/Host1xContext.cs b/Ryujinx.HLE/HOS/Services/Nv/Host1xContext.cs index 024b0bb4..14f3389b 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/Host1xContext.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/Host1xContext.cs @@ -22,21 +22,6 @@ namespace Ryujinx.HLE.HOS.Services.Nv var vic = new VicDevice(Smmu); Host1x.RegisterDevice(ClassId.Nvdec, nvdec); Host1x.RegisterDevice(ClassId.Vic, vic); - - nvdec.FrameDecoded += (FrameDecodedEventArgs e) => - { - // FIXME: - // Figure out what is causing frame ordering issues on H264. - // For now this is needed as workaround. - if (e.CodecId == CodecId.H264) - { - vic.SetSurfaceOverride(e.LumaOffset, e.ChromaOffset, 0); - } - else - { - vic.DisableSurfaceOverride(); - } - }; } public void Dispose() diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs index a85fd44c..e11949be 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs @@ -27,6 +27,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel private readonly IVirtualMemoryManager _memory; private readonly Host1xContext _host1xContext; + private readonly long _contextId; public GpuChannel Channel { get; } @@ -52,6 +53,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel _submitTimeout = 0; _timeslice = 0; _host1xContext = GetHost1XContext(context.Device.Gpu, owner); + _contextId = _host1xContext.Host1x.CreateContext(); Channel = _device.Gpu.CreateChannel(); ChannelInitialization.InitializeState(Channel); @@ -167,7 +169,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel var data = _memory.GetSpan(map.Address + commandBuffer.Offset, commandBuffer.WordsCount * 4); - _host1xContext.Host1x.Submit(MemoryMarshal.Cast(data)); + _host1xContext.Host1x.Submit(MemoryMarshal.Cast(data), _contextId); } } @@ -177,7 +179,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel tmpCmdBuff[0] = (4 << 28) | (int)fences[0].Id; - _host1xContext.Host1x.Submit(tmpCmdBuff); + _host1xContext.Host1x.Submit(tmpCmdBuff, _contextId); return NvInternalResult.Success; } @@ -548,6 +550,7 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel public override void Close() { + _host1xContext.Host1x.DestroyContext(_contextId); Channel.Dispose(); }