From 1402d8391d84f912110104e3e6c1a50a8c000d59 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 23 Mar 2022 17:09:32 -0300 Subject: [PATCH] Support NVDEC H264 interlaced video decoding and VIC deinterlacing (#3225) * Support NVDEC H264 interlaced video decoding and VIC deinterlacing * Remove unused code --- Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 43 ++++ Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs | 10 + Ryujinx.Graphics.Nvdec.FFmpeg/Surface.cs | 6 +- Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs | 3 +- Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs | 2 + Ryujinx.Graphics.Nvdec/H264Decoder.cs | 19 +- Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs | 49 ++++ .../Types/H264/PictureInfo.cs | 6 +- Ryujinx.Graphics.Vic/Blender.cs | 36 +-- Ryujinx.Graphics.Vic/Image/BufferPool.cs | 2 +- Ryujinx.Graphics.Vic/Image/InputSurface.cs | 69 ++++++ Ryujinx.Graphics.Vic/Image/SurfaceReader.cs | 221 +++++++++++++++--- Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs | 6 +- Ryujinx.Graphics.Vic/Scaler.cs | 124 ++++++++++ Ryujinx.Graphics.Vic/Types/DeinterlaceMode.cs | 12 + Ryujinx.Graphics.Vic/Types/FrameFormat.cs | 79 +++++++ Ryujinx.Graphics.Vic/Types/SlotConfig.cs | 4 +- Ryujinx.Graphics.Vic/VicDevice.cs | 4 +- Ryujinx.Graphics.Video/FrameField.cs | 8 + Ryujinx.Graphics.Video/ISurface.cs | 2 + 20 files changed, 623 insertions(+), 82 deletions(-) create mode 100644 Ryujinx.Graphics.Vic/Scaler.cs create mode 100644 Ryujinx.Graphics.Vic/Types/DeinterlaceMode.cs create mode 100644 Ryujinx.Graphics.Vic/Types/FrameFormat.cs create mode 100644 Ryujinx.Graphics.Video/FrameField.cs diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index c57f1a6f..ae27c712 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -255,6 +255,49 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Writes data to GPU mapped memory, stopping at the first unmapped page at the memory region, if any. + /// + /// GPU virtual address to write the data into + /// The data to be written + public void WriteMapped(ulong va, ReadOnlySpan data) + { + if (IsContiguous(va, data.Length)) + { + Physical.Write(Translate(va), data); + } + else + { + int offset = 0, size; + + if ((va & PageMask) != 0) + { + ulong pa = Translate(va); + + size = Math.Min(data.Length, (int)PageSize - (int)(va & PageMask)); + + if (pa != PteUnmapped && Physical.IsMapped(pa)) + { + Physical.Write(pa, data.Slice(0, size)); + } + + offset += size; + } + + for (; offset < data.Length; offset += size) + { + ulong pa = Translate(va + (ulong)offset); + + size = Math.Min(data.Length - offset, (int)PageSize); + + if (pa != PteUnmapped && Physical.IsMapped(pa)) + { + Physical.Write(pa, data.Slice(offset, size)); + } + } + } + } + /// /// Maps a given range of pages to the specified CPU virtual address. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs index bfd3c04f..57590fb3 100644 --- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs +++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs @@ -340,6 +340,16 @@ namespace Ryujinx.Graphics.Gpu.Memory return _cpuMemory.BeginSmartGranularTracking(address, size, granularity); } + /// + /// Checks if the page at a given address is mapped on CPU memory. + /// + /// CPU virtual address of the page to check + /// True if mapped, false otherwise + public bool IsMapped(ulong address) + { + return _cpuMemory.IsMapped(address); + } + /// /// Release our reference to the CPU memory manager. /// diff --git a/Ryujinx.Graphics.Nvdec.FFmpeg/Surface.cs b/Ryujinx.Graphics.Nvdec.FFmpeg/Surface.cs index 20cee4a1..13d0df49 100644 --- a/Ryujinx.Graphics.Nvdec.FFmpeg/Surface.cs +++ b/Ryujinx.Graphics.Nvdec.FFmpeg/Surface.cs @@ -15,11 +15,13 @@ namespace Ryujinx.Graphics.Nvdec.FFmpeg public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight); public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight); + public FrameField Field => Frame->interlaced_frame != 0 ? FrameField.Interlaced : FrameField.Progressive; + public int Width => Frame->width; public int Height => Frame->height; public int Stride => Frame->linesize[0]; - public int UvWidth => (Frame->width + 1) >> 1; - public int UvHeight => (Frame->height + 1) >> 1; + public int UvWidth => (Width + 1) >> 1; + public int UvHeight => (Height + 1) >> 1; public int UvStride => Frame->linesize[1]; public Surface(int width, int height) diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs index e41a31ca..873f667a 100644 --- a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs @@ -486,8 +486,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp Idct8(tempIn, tempOut); for (j = 0; j < 8; ++j) { - dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], - BitUtils.RoundPowerOfTwo(tempOut[j], 5)); + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5)); } } } diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs index 2b2a173e..d5b51bc2 100644 --- a/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs @@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Types public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length); public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length); + public FrameField Field => FrameField.Progressive; + public int Width { get; } public int Height { get; } public int AlignedWidth { get; } diff --git a/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/H264Decoder.cs index 69eeb494..6efeb899 100644 --- a/Ryujinx.Graphics.Nvdec/H264Decoder.cs +++ b/Ryujinx.Graphics.Nvdec/H264Decoder.cs @@ -31,7 +31,24 @@ namespace Ryujinx.Graphics.Nvdec if (decoder.Decode(ref info, outputSurface, bitstream)) { - SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); + if (outputSurface.Field == FrameField.Progressive) + { + SurfaceWriter.Write( + rm.Gmm, + outputSurface, + lumaOffset + pictureInfo.LumaFrameOffset, + chromaOffset + pictureInfo.ChromaFrameOffset); + } + else + { + SurfaceWriter.WriteInterlaced( + rm.Gmm, + outputSurface, + lumaOffset + pictureInfo.LumaTopFieldOffset, + chromaOffset + pictureInfo.ChromaTopFieldOffset, + lumaOffset + pictureInfo.LumaBottomFieldOffset, + chromaOffset + pictureInfo.ChromaBottomFieldOffset); + } } rm.Cache.Put(outputSurface); diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs index 5c294621..cc5c251b 100644 --- a/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs @@ -38,6 +38,55 @@ namespace Ryujinx.Graphics.Nvdec.Image surface.UvHeight); } + public static void WriteInterlaced( + MemoryManager gmm, + ISurface surface, + uint lumaTopOffset, + uint chromaTopOffset, + uint lumaBottomOffset, + uint chromaBottomOffset) + { + int lumaSize = GetBlockLinearSize(surface.Width, surface.Height / 2, 1); + + using var lumaTop = gmm.GetWritableRegion(ExtendOffset(lumaTopOffset), lumaSize); + using var lumaBottom = gmm.GetWritableRegion(ExtendOffset(lumaBottomOffset), lumaSize); + + WriteLuma( + lumaTop.Memory.Span, + surface.YPlane.AsSpan(), + surface.Stride * 2, + surface.Width, + surface.Height / 2); + + WriteLuma( + lumaBottom.Memory.Span, + surface.YPlane.AsSpan().Slice(surface.Stride), + surface.Stride * 2, + surface.Width, + surface.Height / 2); + + int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight / 2, 2); + + using var chromaTop = gmm.GetWritableRegion(ExtendOffset(chromaTopOffset), chromaSize); + using var chromaBottom = gmm.GetWritableRegion(ExtendOffset(chromaBottomOffset), chromaSize); + + WriteChroma( + chromaTop.Memory.Span, + surface.UPlane.AsSpan(), + surface.VPlane.AsSpan(), + surface.UvStride * 2, + surface.UvWidth, + surface.UvHeight / 2); + + WriteChroma( + chromaBottom.Memory.Span, + surface.UPlane.AsSpan().Slice(surface.UvStride), + surface.VPlane.AsSpan().Slice(surface.UvStride), + surface.UvStride * 2, + surface.UvWidth, + surface.UvHeight / 2); + } + private static void WriteLuma(Span dst, ReadOnlySpan src, int srcStride, int width, int height) { LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src); diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs index 326c40ae..7c779dff 100644 --- a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs +++ b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs @@ -26,10 +26,10 @@ namespace Ryujinx.Graphics.Nvdec.Types.H264 public uint Transform8x8ModeFlag; public uint LumaPitch; public uint ChromaPitch; - public uint LumaTopOffset; - public uint LumaBottomOffset; + public uint LumaTopFieldOffset; + public uint LumaBottomFieldOffset; public uint LumaFrameOffset; - public uint ChromaTopOffset; + public uint ChromaTopFieldOffset; public uint ChromaBottomFieldOffset; public uint ChromaFrameOffset; public uint HistBufferSize; diff --git a/Ryujinx.Graphics.Vic/Blender.cs b/Ryujinx.Graphics.Vic/Blender.cs index 84171241..92b641d6 100644 --- a/Ryujinx.Graphics.Vic/Blender.cs +++ b/Ryujinx.Graphics.Vic/Blender.cs @@ -48,38 +48,10 @@ namespace Ryujinx.Graphics.Vic int one = 1 << (mtx.MatrixRShift + 8); - - // NOTE: This is buggy on .NET 5.0.100, we use a workaround for now (see https://github.com/dotnet/runtime/issues/44704) - // TODO: Uncomment this when fixed. - //Vector128 col1 = Vector128.Create(mtx.MatrixCoeff00, mtx.MatrixCoeff10, mtx.MatrixCoeff20, 0); - //Vector128 col2 = Vector128.Create(mtx.MatrixCoeff01, mtx.MatrixCoeff11, mtx.MatrixCoeff21, 0); - //Vector128 col3 = Vector128.Create(mtx.MatrixCoeff02, mtx.MatrixCoeff12, mtx.MatrixCoeff22, one); - //Vector128 col4 = Vector128.Create(mtx.MatrixCoeff03, mtx.MatrixCoeff13, mtx.MatrixCoeff23, 0); - - Vector128 col1 = new Vector128(); - Vector128 col2 = new Vector128(); - Vector128 col3 = new Vector128(); - Vector128 col4 = new Vector128(); - - col1 = Sse41.Insert(col1, mtx.MatrixCoeff00, 0); - col1 = Sse41.Insert(col1, mtx.MatrixCoeff10, 1); - col1 = Sse41.Insert(col1, mtx.MatrixCoeff20, 2); - col1 = Sse41.Insert(col1, 0, 3); - - col2 = Sse41.Insert(col2, mtx.MatrixCoeff01, 0); - col2 = Sse41.Insert(col2, mtx.MatrixCoeff11, 1); - col2 = Sse41.Insert(col2, mtx.MatrixCoeff21, 2); - col2 = Sse41.Insert(col2, 0, 3); - - col3 = Sse41.Insert(col3, mtx.MatrixCoeff02, 0); - col3 = Sse41.Insert(col3, mtx.MatrixCoeff12, 1); - col3 = Sse41.Insert(col3, mtx.MatrixCoeff22, 2); - col3 = Sse41.Insert(col3, one, 3); - - col4 = Sse41.Insert(col4, mtx.MatrixCoeff03, 0); - col4 = Sse41.Insert(col4, mtx.MatrixCoeff13, 1); - col4 = Sse41.Insert(col4, mtx.MatrixCoeff23, 2); - col4 = Sse41.Insert(col4, 0, 3); + Vector128 col1 = Vector128.Create(mtx.MatrixCoeff00, mtx.MatrixCoeff10, mtx.MatrixCoeff20, 0); + Vector128 col2 = Vector128.Create(mtx.MatrixCoeff01, mtx.MatrixCoeff11, mtx.MatrixCoeff21, 0); + Vector128 col3 = Vector128.Create(mtx.MatrixCoeff02, mtx.MatrixCoeff12, mtx.MatrixCoeff22, one); + Vector128 col4 = Vector128.Create(mtx.MatrixCoeff03, mtx.MatrixCoeff13, mtx.MatrixCoeff23, 0); Vector128 rShift = Vector128.CreateScalar(mtx.MatrixRShift); Vector128 clMin = Vector128.Create((ushort)slot.SlotConfig.SoftClampLow); diff --git a/Ryujinx.Graphics.Vic/Image/BufferPool.cs b/Ryujinx.Graphics.Vic/Image/BufferPool.cs index 932d3dc9..cde7e6eb 100644 --- a/Ryujinx.Graphics.Vic/Image/BufferPool.cs +++ b/Ryujinx.Graphics.Vic/Image/BufferPool.cs @@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Vic.Image /// If the required buffer is larger than this, it won't be /// added to the pool to avoid long term high memory usage. /// - private const int MaxBufferSize = 2048 * 1280; + private const int MaxBufferSize = 2048 * 2048; private struct PoolItem { diff --git a/Ryujinx.Graphics.Vic/Image/InputSurface.cs b/Ryujinx.Graphics.Vic/Image/InputSurface.cs index de003194..15ac0460 100644 --- a/Ryujinx.Graphics.Vic/Image/InputSurface.cs +++ b/Ryujinx.Graphics.Vic/Image/InputSurface.cs @@ -2,16 +2,85 @@ namespace Ryujinx.Graphics.Vic.Image { + ref struct RentedBuffer + { + public static RentedBuffer Empty => new RentedBuffer(Span.Empty, -1); + + public Span Data; + public int Index; + + public RentedBuffer(Span data, int index) + { + Data = data; + Index = index; + } + + public void Return(BufferPool pool) + { + if (Index != -1) + { + pool.Return(Index); + } + } + } + ref struct InputSurface { public ReadOnlySpan Buffer0; public ReadOnlySpan Buffer1; public ReadOnlySpan Buffer2; + public int Buffer0Index; + public int Buffer1Index; + public int Buffer2Index; + public int Width; public int Height; public int UvWidth; public int UvHeight; + + public void Initialize() + { + Buffer0Index = -1; + Buffer1Index = -1; + Buffer2Index = -1; + } + + public void SetBuffer0(RentedBuffer buffer) + { + Buffer0 = buffer.Data; + Buffer0Index = buffer.Index; + } + + public void SetBuffer1(RentedBuffer buffer) + { + Buffer1 = buffer.Data; + Buffer1Index = buffer.Index; + } + + public void SetBuffer2(RentedBuffer buffer) + { + Buffer2 = buffer.Data; + Buffer2Index = buffer.Index; + } + + public void Return(BufferPool pool) + { + if (Buffer0Index != -1) + { + pool.Return(Buffer0Index); + } + + if (Buffer1Index != -1) + { + pool.Return(Buffer1Index); + } + + if (Buffer2Index != -1) + { + pool.Return(Buffer2Index); + } + } } } diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs index aa880916..dda766a5 100644 --- a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs +++ b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs @@ -1,5 +1,5 @@ using Ryujinx.Common.Logging; -using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.Texture; using Ryujinx.Graphics.Vic.Types; using System; @@ -12,24 +12,32 @@ namespace Ryujinx.Graphics.Vic.Image { static class SurfaceReader { - public static Surface Read(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets) + public static Surface Read( + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8 offsets) { - switch (config.SlotPixelFormat) + switch (surfaceConfig.SlotPixelFormat) { - case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref offsets); + case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets); } - Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{config.SlotPixelFormat}\"."); + Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\"."); - int lw = config.SlotLumaWidth + 1; - int lh = config.SlotLumaHeight + 1; + int lw = surfaceConfig.SlotLumaWidth + 1; + int lh = surfaceConfig.SlotLumaHeight + 1; return new Surface(rm.SurfacePool, lw, lh); } - private unsafe static Surface ReadNv12(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets) + private unsafe static Surface ReadNv12( + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8 offsets) { - InputSurface input = ReadSurface(rm.Gmm, ref config, ref offsets, 1, 2); + InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2); int width = input.Width; int height = input.Height; @@ -160,6 +168,8 @@ namespace Ryujinx.Graphics.Vic.Image } } + input.Return(rm.BufferPool); + return output; } @@ -170,84 +180,227 @@ namespace Ryujinx.Graphics.Vic.Image } private static InputSurface ReadSurface( - MemoryManager gmm, - ref SlotSurfaceConfig config, - ref PlaneOffsets offsets, + ResourceManager rm, + ref SlotConfig config, + ref SlotSurfaceConfig surfaceConfig, + ref Array8 offsets, int bytesPerPixel, int planes) { InputSurface surface = new InputSurface(); - int gobBlocksInY = 1 << config.SlotBlkHeight; + surface.Initialize(); - bool linear = config.SlotBlkKind == 0; + int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight; - int lw = config.SlotLumaWidth + 1; - int lh = config.SlotLumaHeight + 1; + bool linear = surfaceConfig.SlotBlkKind == 0; - int cw = config.SlotChromaWidth + 1; - int ch = config.SlotChromaHeight + 1; + int lw = surfaceConfig.SlotLumaWidth + 1; + int lh = surfaceConfig.SlotLumaHeight + 1; + + int cw = surfaceConfig.SlotChromaWidth + 1; + int ch = surfaceConfig.SlotChromaHeight + 1; + + // Interlaced inputs have double the height when deinterlaced. + int heightShift = config.FrameFormat.IsField() ? 1 : 0; surface.Width = lw; - surface.Height = lh; + surface.Height = lh << heightShift; surface.UvWidth = cw; - surface.UvHeight = ch; + surface.UvHeight = ch << heightShift; if (planes > 0) { - surface.Buffer0 = ReadBuffer(gmm, offsets.LumaOffset, linear, lw, lh, bytesPerPixel, gobBlocksInY); + surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY)); } if (planes > 1) { - surface.Buffer1 = ReadBuffer(gmm, offsets.ChromaUOffset, linear, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY); + surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY)); } if (planes > 2) { - surface.Buffer2 = ReadBuffer(gmm, offsets.ChromaVOffset, linear, cw, ch, 1, gobBlocksInY); + surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY)); } return surface; } - private static ReadOnlySpan ReadBuffer( - MemoryManager gmm, - uint offset, + private static RentedBuffer ReadBuffer( + ResourceManager rm, + ref SlotConfig config, + ref Array8 offsets, bool linear, + int plane, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + FrameFormat frameFormat = config.FrameFormat; + bool isLuma = plane == 0; + bool isField = frameFormat.IsField(); + bool isTopField = frameFormat.IsTopField(isLuma); + int stride = GetPitch(width, bytesPerPixel); + uint offset = GetOffset(ref offsets[0], plane); + + int dstStart = 0; + int dstStride = stride; + + if (isField) + { + dstStart = isTopField ? 0 : stride; + dstStride = stride * 2; + } + + RentedBuffer buffer; + + if (linear) + { + buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel); + } + else + { + buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY); + } + + if (isField || frameFormat.IsInterlaced()) + { + RentedBuffer prevBuffer = RentedBuffer.Empty; + RentedBuffer nextBuffer = RentedBuffer.Empty; + + if (config.PrevFieldEnable) + { + prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY); + } + + if (config.NextFieldEnable) + { + nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY); + } + + int w = width * bytesPerPixel; + + switch (config.DeinterlaceMode) + { + case DeinterlaceMode.Weave: + Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.BobField: + Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.Bob: + bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven; + Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst()); + break; + case DeinterlaceMode.NewBob: + case DeinterlaceMode.Disi1: + Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField); + break; + case DeinterlaceMode.WeaveLumaBobFieldChroma: + if (isLuma) + { + Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); + } + else + { + Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); + } + break; + default: + Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\"."); + break; + } + + prevBuffer.Return(rm.BufferPool); + nextBuffer.Return(rm.BufferPool); + } + + return buffer; + } + + private static uint GetOffset(ref PlaneOffsets offsets, int plane) + { + return plane switch + { + 0 => offsets.LumaOffset, + 1 => offsets.ChromaUOffset, + 2 => offsets.ChromaVOffset, + _ => throw new ArgumentOutOfRangeException(nameof(plane)) + }; + } + + private static RentedBuffer ReadBufferNoDeinterlace( + ResourceManager rm, + ref PlaneOffsets offsets, + bool linear, + int plane, int width, int height, int bytesPerPixel, int gobBlocksInY) { int stride = GetPitch(width, bytesPerPixel); + uint offset = GetOffset(ref offsets, plane); if (linear) { - return gmm.GetSpan(ExtendOffset(offset), stride * height); + return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel); } - return ReadBuffer(gmm, offset, width, height, stride, bytesPerPixel, gobBlocksInY); + return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY); } - private static ReadOnlySpan ReadBuffer( - MemoryManager gmm, + private static RentedBuffer ReadBufferLinear( + ResourceManager rm, uint offset, int width, int height, + int dstStart, + int dstStride, + int bytesPerPixel) + { + int srcStride = GetPitch(width, bytesPerPixel); + int inSize = srcStride * height; + + ReadOnlySpan src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); + + int outSize = dstStride * height; + int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); + Span dst = buffer; + dst = dst.Slice(0, outSize); + + for (int y = 0; y < height; y++) + { + src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride)); + } + + return new RentedBuffer(dst, bufferIndex); + } + + private static RentedBuffer ReadBufferBlockLinear( + ResourceManager rm, + uint offset, + int width, + int height, + int dstStart, int dstStride, int bytesPerPixel, int gobBlocksInY) { int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); - ReadOnlySpan src = gmm.GetSpan(ExtendOffset(offset), inSize); + ReadOnlySpan src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); - Span dst = new byte[dstStride * height]; + int outSize = dstStride * height; + int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); + Span dst = buffer; + dst = dst.Slice(0, outSize); - LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src); + LayoutConverter.ConvertBlockLinearToLinear(dst.Slice(dstStart), width, height, dstStride, bytesPerPixel, gobBlocksInY, src); - return dst; + return new RentedBuffer(dst, bufferIndex); } } } diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs index dd64720e..297a04b6 100644 --- a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs +++ b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs @@ -15,7 +15,7 @@ namespace Ryujinx.Graphics.Vic.Image switch (config.OutPixelFormat) { case PixelFormat.A8B8G8R8: - case PixelFormat.X8B8G8R8: + case PixelFormat.X8B8G8R8: WriteA8B8G8R8(rm, input, ref config, ref offsets); break; case PixelFormat.A8R8G8B8: @@ -433,7 +433,7 @@ namespace Ryujinx.Graphics.Vic.Image { if (linear) { - rm.Gmm.Write(ExtendOffset(offset), src); + rm.Gmm.WriteMapped(ExtendOffset(offset), src); return; } @@ -456,7 +456,7 @@ namespace Ryujinx.Graphics.Vic.Image LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src); - rm.Gmm.Write(ExtendOffset(offset), dst); + rm.Gmm.WriteMapped(ExtendOffset(offset), dst); rm.BufferPool.Return(dstIndex); } diff --git a/Ryujinx.Graphics.Vic/Scaler.cs b/Ryujinx.Graphics.Vic/Scaler.cs new file mode 100644 index 00000000..18ae66c4 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Scaler.cs @@ -0,0 +1,124 @@ +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Vic +{ + static class Scaler + { + public static void DeinterlaceWeave(Span data, ReadOnlySpan prevData, int width, int fieldSize, bool isTopField) + { + // Prev I Curr I Curr P + // TTTTTTTT BBBBBBBB TTTTTTTT + // -------- -------- BBBBBBBB + + if (isTopField) + { + for (int offset = 0; offset < data.Length; offset += fieldSize * 2) + { + prevData.Slice(offset >> 1, width).CopyTo(data.Slice(offset + fieldSize, width)); + } + } + else + { + for (int offset = 0; offset < data.Length; offset += fieldSize * 2) + { + prevData.Slice(offset >> 1, width).CopyTo(data.Slice(offset, width)); + } + } + } + + public static void DeinterlaceBob(Span data, int width, int fieldSize, bool isTopField) + { + // Curr I Curr P + // TTTTTTTT TTTTTTTT + // -------- TTTTTTTT + + if (isTopField) + { + for (int offset = 0; offset < data.Length; offset += fieldSize * 2) + { + data.Slice(offset, width).CopyTo(data.Slice(offset + fieldSize, width)); + } + } + else + { + for (int offset = 0; offset < data.Length; offset += fieldSize * 2) + { + data.Slice(offset + fieldSize, width).CopyTo(data.Slice(offset, width)); + } + } + } + + public unsafe static void DeinterlaceMotionAdaptive( + Span data, + ReadOnlySpan prevData, + ReadOnlySpan nextData, + int width, + int fieldSize, + bool isTopField) + { + // Very simple motion adaptive algorithm. + // If the pixel changed between previous and next frame, use Bob, otherwise use Weave. + // + // Example pseudo code: + // C_even = (P_even == N_even) ? P_even : C_odd + // Where: C is current frame, P is previous frame and N is next frame, and even/odd are the fields. + // + // Note: This does not fully match the hardware algorithm. + // The motion adaptive deinterlacing implemented on hardware is considerably more complex, + // and hard to implement accurately without proper documentation as for example, the + // method used for motion estimation is unknown. + + int start = isTopField ? fieldSize : 0; + int otherFieldOffset = isTopField ? -fieldSize : fieldSize; + + fixed (byte* pData = data, pPrevData = prevData, pNextData = nextData) + { + for (int offset = start; offset < data.Length; offset += fieldSize * 2) + { + int refOffset = (offset - start) >> 1; + int x = 0; + + if (Avx2.IsSupported) + { + for (; x < (width & ~0x1f); x += 32) + { + Vector256 prevPixels = Avx.LoadVector256(pPrevData + refOffset + x); + Vector256 nextPixels = Avx.LoadVector256(pNextData + refOffset + x); + Vector256 bob = Avx.LoadVector256(pData + offset + otherFieldOffset + x); + Vector256 diff = Avx2.CompareEqual(prevPixels, nextPixels); + Avx.Store(pData + offset + x, Avx2.BlendVariable(bob, prevPixels, diff)); + } + } + else if (Sse41.IsSupported) + { + for (; x < (width & ~0xf); x += 16) + { + Vector128 prevPixels = Sse2.LoadVector128(pPrevData + refOffset + x); + Vector128 nextPixels = Sse2.LoadVector128(pNextData + refOffset + x); + Vector128 bob = Sse2.LoadVector128(pData + offset + otherFieldOffset + x); + Vector128 diff = Sse2.CompareEqual(prevPixels, nextPixels); + Sse2.Store(pData + offset + x, Sse41.BlendVariable(bob, prevPixels, diff)); + } + } + + for (; x < width; x++) + { + byte prevPixel = prevData[refOffset + x]; + byte nextPixel = nextData[refOffset + x]; + + if (nextPixel != prevPixel) + { + data[offset + x] = data[offset + otherFieldOffset + x]; + } + else + { + data[offset + x] = prevPixel; + } + } + } + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vic/Types/DeinterlaceMode.cs b/Ryujinx.Graphics.Vic/Types/DeinterlaceMode.cs new file mode 100644 index 00000000..aa0654f0 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/DeinterlaceMode.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + enum DeinterlaceMode + { + Weave, + BobField, + Bob, + NewBob, + Disi1, + WeaveLumaBobFieldChroma + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vic/Types/FrameFormat.cs b/Ryujinx.Graphics.Vic/Types/FrameFormat.cs new file mode 100644 index 00000000..91f5751b --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/FrameFormat.cs @@ -0,0 +1,79 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + enum FrameFormat + { + Progressive, + InterlacedTopFieldFirst, + InterlacedBottomFieldFirst, + TopField, + BottomField, + SubPicProgressive, + SubPicInterlacedTopFieldFirst, + SubPicInterlacedBottomFieldFirst, + SubPicTopField, + SubPicBottomField, + TopFieldChromaBottom, + BottomFieldChromaTop, + SubPicTopFieldChromaBottom, + SubPicBottomFieldChromaTop + } + + static class FrameFormatExtensions + { + public static bool IsField(this FrameFormat frameFormat) + { + switch (frameFormat) + { + case FrameFormat.TopField: + case FrameFormat.BottomField: + case FrameFormat.SubPicTopField: + case FrameFormat.SubPicBottomField: + case FrameFormat.TopFieldChromaBottom: + case FrameFormat.BottomFieldChromaTop: + case FrameFormat.SubPicTopFieldChromaBottom: + case FrameFormat.SubPicBottomFieldChromaTop: + return true; + } + + return false; + } + + public static bool IsInterlaced(this FrameFormat frameFormat) + { + switch (frameFormat) + { + case FrameFormat.InterlacedTopFieldFirst: + case FrameFormat.InterlacedBottomFieldFirst: + case FrameFormat.SubPicInterlacedTopFieldFirst: + case FrameFormat.SubPicInterlacedBottomFieldFirst: + return true; + } + + return false; + } + + public static bool IsInterlacedBottomFirst(this FrameFormat frameFormat) + { + return frameFormat == FrameFormat.InterlacedBottomFieldFirst || + frameFormat == FrameFormat.SubPicInterlacedBottomFieldFirst; + } + + public static bool IsTopField(this FrameFormat frameFormat, bool isLuma) + { + switch (frameFormat) + { + case FrameFormat.TopField: + case FrameFormat.SubPicTopField: + return true; + case FrameFormat.TopFieldChromaBottom: + case FrameFormat.SubPicTopFieldChromaBottom: + return isLuma; + case FrameFormat.BottomFieldChromaTop: + case FrameFormat.SubPicBottomFieldChromaTop: + return !isLuma; + } + + return false; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Vic/Types/SlotConfig.cs b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs index 183ee4ac..373e76f6 100644 --- a/Ryujinx.Graphics.Vic/Types/SlotConfig.cs +++ b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs @@ -27,7 +27,7 @@ public bool PrevMotionFieldEnable => _word0.Extract(13); public bool PpMotionFieldEnable => _word0.Extract(14); public bool CombMotionFieldEnable => _word0.Extract(15); - public int FrameFormat => _word0.Extract(16, 4); + public FrameFormat FrameFormat => (FrameFormat)_word0.Extract(16, 4); public int FilterLengthY => _word0.Extract(20, 2); public int FilterLengthX => _word0.Extract(22, 2); public int Panoramic => _word0.Extract(24, 12); @@ -36,7 +36,7 @@ public int FilterDetail => _word1.Extract(74, 10); public int ChromaNoise => _word1.Extract(84, 10); public int ChromaDetail => _word1.Extract(94, 10); - public int DeinterlaceMode => _word1.Extract(104, 4); + public DeinterlaceMode DeinterlaceMode => (DeinterlaceMode)_word1.Extract(104, 4); public int MotionAccumWeight => _word1.Extract(108, 3); public int NoiseIir => _word1.Extract(111, 11); public int LightLevel => _word1.Extract(122, 4); diff --git a/Ryujinx.Graphics.Vic/VicDevice.cs b/Ryujinx.Graphics.Vic/VicDevice.cs index 21021c58..537b8ba4 100644 --- a/Ryujinx.Graphics.Vic/VicDevice.cs +++ b/Ryujinx.Graphics.Vic/VicDevice.cs @@ -43,9 +43,9 @@ namespace Ryujinx.Graphics.Vic continue; } - var offsets = _state.State.SetSurfacexSlotx[i][0]; + ref var offsets = ref _state.State.SetSurfacexSlotx[i]; - using Surface src = SurfaceReader.Read(_rm, ref slot.SlotSurfaceConfig, ref offsets); + using Surface src = SurfaceReader.Read(_rm, ref slot.SlotConfig, ref slot.SlotSurfaceConfig, ref offsets); Blender.BlendOne(output, src, ref slot); } diff --git a/Ryujinx.Graphics.Video/FrameField.cs b/Ryujinx.Graphics.Video/FrameField.cs new file mode 100644 index 00000000..2bff0e75 --- /dev/null +++ b/Ryujinx.Graphics.Video/FrameField.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Video +{ + public enum FrameField + { + Progressive, + Interlaced + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Video/ISurface.cs b/Ryujinx.Graphics.Video/ISurface.cs index fb66f31a..7c1661f1 100644 --- a/Ryujinx.Graphics.Video/ISurface.cs +++ b/Ryujinx.Graphics.Video/ISurface.cs @@ -8,6 +8,8 @@ namespace Ryujinx.Graphics.Video Plane UPlane { get; } Plane VPlane { get; } + FrameField Field { get; } + int Width { get; } int Height { get; } int Stride { get; }