From 4d02a2d2c0451b4de1f6de3bbce54c457cacebe2 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 12 Jul 2020 00:07:01 -0300 Subject: [PATCH] New NVDEC and VIC implementation (#1384) * Initial NVDEC and VIC implementation * Update FFmpeg.AutoGen to 4.3.0 * Add nvdec dependencies for Windows * Unify some VP9 structures * Rename VP9 structure fields * Improvements to Video API * XML docs for Common.Memory * Remove now unused or redundant overloads from MemoryAccessor * NVDEC UV surface read/write scalar paths * Add FIXME comments about hacky things/stuff that will need to be fixed in the future * Cleaned up VP9 memory allocation * Remove some debug logs * Rename some VP9 structs * Remove unused struct * No need to compile Ryujinx.Graphics.Host1x with unsafe anymore * Name AsyncWorkQueue threads to make debugging easier * Make Vp9PictureInfo a ref struct * LayoutConverter no longer needs the depth argument (broken by rebase) * Pooling of VP9 buffers, plus fix a memory leak on VP9 * Really wish VS could rename projects properly... * Address feedback * Remove using * Catch OperationCanceledException * Add licensing informations * Add THIRDPARTY.md to release too Co-authored-by: Thog --- README.md | 6 + Ryujinx.Common/AsyncWorkQueue.cs | 100 + Ryujinx.Common/Logging/LogClass.cs | 5 +- Ryujinx.Common/Memory/ArrayPtr.cs | 123 + Ryujinx.Common/Memory/IArray.cs | 21 + Ryujinx.Common/Memory/Ptr.cs | 68 + Ryujinx.Common/Memory/StructArrayHelpers.cs | 518 +++ Ryujinx.Cpu/MemoryManager.cs | 32 + Ryujinx.Cpu/WritableRegion.cs | 29 + Ryujinx.Graphics.Device/AccessControl.cs | 10 + Ryujinx.Graphics.Device/DeviceState.cs | 124 + Ryujinx.Graphics.Device/IDeviceState.cs | 8 + Ryujinx.Graphics.Device/RegisterAttribute.cs | 15 + Ryujinx.Graphics.Device/RwCallback.cs | 16 + .../Ryujinx.Graphics.Device.csproj | 7 + Ryujinx.Graphics.Device/SizeCalculator.cs | 63 + Ryujinx.Graphics.Gpu/Engine/Compute.cs | 2 +- .../Engine/MethodConditionalRendering.cs | 10 +- Ryujinx.Graphics.Gpu/Engine/Methods.cs | 2 +- Ryujinx.Graphics.Gpu/GpuContext.cs | 2 +- Ryujinx.Graphics.Gpu/Image/TextureManager.cs | 2 + Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs | 36 - Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 60 +- Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs | 12 + Ryujinx.Graphics.Host1x/ClassId.cs | 20 + Ryujinx.Graphics.Host1x/Devices.cs | 32 + Ryujinx.Graphics.Host1x/Host1xClass.cs | 33 + .../Host1xClassRegisters.cs | 41 + Ryujinx.Graphics.Host1x/Host1xDevice.cs | 123 + Ryujinx.Graphics.Host1x/OpCode.cs | 21 + .../Ryujinx.Graphics.Host1x.csproj | 20 + Ryujinx.Graphics.Host1x/SyncptIncrManager.cs | 99 + Ryujinx.Graphics.Host1x/ThiDevice.cs | 96 + Ryujinx.Graphics.Host1x/ThiRegisters.cs | 22 + Ryujinx.Graphics.Nvdec.H264/Decoder.cs | 40 + Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs | 51 + .../H264BitStreamWriter.cs | 121 + .../Ryujinx.Graphics.Nvdec.H264.csproj | 23 + .../SpsAndPpsReconstruction.cs | 159 + Ryujinx.Graphics.Nvdec.H264/Surface.cs | 33 + Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs | 9 + Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs | 56 + Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs | 59 + .../Common/MemoryAllocator.cs | 94 + .../Common/MemoryUtil.cs | 25 + Ryujinx.Graphics.Nvdec.Vp9/Constants.cs | 71 + Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs | 1190 +++++++ Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs | 1159 +++++++ Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs | 164 + Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs | 325 ++ Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs | 949 ++++++ Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs | 12 + Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs | 1379 ++++++++ Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs | 2868 +++++++++++++++++ Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs | 73 + Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs | 237 ++ Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs | 54 + Ryujinx.Graphics.Nvdec.Vp9/Idct.cs | 536 +++ .../InternalErrorException.cs | 15 + .../InternalErrorInfo.cs | 14 + Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs | 418 +++ Ryujinx.Graphics.Nvdec.Vp9/Luts.cs | 1612 +++++++++ Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs | 389 +++ Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs | 203 ++ Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs | 234 ++ Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs | 761 +++++ .../Ryujinx.Graphics.Nvdec.Vp9.csproj | 20 + Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs | 10 + Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs | 15 + Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs | 10 + Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs | 21 + Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs | 10 + Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs | 8 + .../Types/LoopFilter.cs | 27 + .../Types/LoopFilterInfoN.cs | 10 + .../Types/LoopFilterMask.cs | 24 + .../Types/LoopFilterThresh.cs | 13 + .../Types/MacroBlockD.cs | 179 + .../Types/MacroBlockDPlane.cs | 21 + Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs | 66 + .../Types/MotionVectorContext.cs | 14 + Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs | 189 ++ Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs | 8 + .../Types/MvClassType.cs | 17 + .../Types/MvJointType.cs | 10 + Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs | 10 + .../Types/PartitionType.cs | 12 + Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs | 9 + Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs | 14 + .../Types/PredictionMode.cs | 21 + Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs | 8 + .../Types/ReferenceMode.cs | 10 + .../Types/ScaleFactors.cs | 451 +++ .../Types/SegLvlFeatures.cs | 11 + .../Types/Segmentation.cs | 71 + Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs | 80 + Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs | 85 + Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs | 12 + Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs | 11 + Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs | 11 + Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs | 334 ++ Ryujinx.Graphics.Nvdec/CdmaProcessor.cs | 103 - Ryujinx.Graphics.Nvdec/ChClassId.cs | 20 - Ryujinx.Graphics.Nvdec/ChCommandEntry.cs | 18 - Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs | 13 - Ryujinx.Graphics.Nvdec/CodecId.cs | 13 + .../FrameDecodedEventArgs.cs | 16 + Ryujinx.Graphics.Nvdec/H264Decoder.cs | 43 + Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs | 151 + Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs | 26 + Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs | 133 + Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs | 126 + Ryujinx.Graphics.Nvdec/MemoryExtensions.cs | 28 + Ryujinx.Graphics.Nvdec/NvdecDevice.cs | 55 + Ryujinx.Graphics.Nvdec/NvdecRegisters.cs | 41 + Ryujinx.Graphics.Nvdec/ResourceManager.cs | 17 + .../Ryujinx.Graphics.Nvdec.csproj | 12 +- .../Types/H264/PictureInfo.cs | 120 + .../Types/H264/ReferenceFrame.cs | 10 + .../Types/Vp9/BackwardUpdates.cs | 72 + .../Types/Vp9/EntropyProbs.cs | 139 + .../Types/Vp9/FrameFlags.cs | 12 + Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs | 10 + .../Types/Vp9/FrameStats.cs | 20 + .../Types/Vp9/LoopFilter.cs | 11 + .../Types/Vp9/PictureInfo.cs | 85 + .../Types/Vp9/Segmentation.cs | 14 + .../VDec/BitStreamWriter.cs | 75 - Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs | 17 - Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs | 168 - Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs | 14 - .../VDec/H264BitStreamWriter.cs | 79 - Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs | 238 -- Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs | 8 - .../VDec/H264ParameterSets.cs | 34 - Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs | 10 - Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs | 281 -- .../VDec/VideoDecoderMeth.cs | 19 - Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs | 879 ----- Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs | 79 - Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs | 10 - .../VDec/Vp9ProbabilityTables.cs | 31 - .../VDec/VpxBitStreamWriter.cs | 38 - .../VDec/VpxRangeEncoder.cs | 134 - Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs | 69 - .../Vic/SurfaceOutputConfig.cs | 33 - .../Vic/SurfacePixelFormat.cs | 8 - .../Vic/VideoImageComposer.cs | 94 - .../Vic/VideoImageComposerMeth.cs | 12 - Ryujinx.Graphics.Nvdec/Vp9Decoder.cs | 92 + Ryujinx.Graphics.Texture/LayoutConverter.cs | 168 + Ryujinx.Graphics.Texture/OffsetCalculator.cs | 13 + Ryujinx.Graphics.Vic/Blender.cs | 157 + Ryujinx.Graphics.Vic/Image/BufferPool.cs | 103 + Ryujinx.Graphics.Vic/Image/InputSurface.cs | 17 + Ryujinx.Graphics.Vic/Image/Pixel.cs | 10 + Ryujinx.Graphics.Vic/Image/Surface.cs | 46 + Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs | 33 + Ryujinx.Graphics.Vic/Image/SurfaceReader.cs | 253 ++ Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs | 361 +++ Ryujinx.Graphics.Vic/ResourceManager.cs | 19 + .../Ryujinx.Graphics.Vic.csproj | 23 + .../Types/BitfieldExtensions.cs | 39 + .../Types/BlendingSlotStruct.cs | 27 + Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs | 17 + Ryujinx.Graphics.Vic/Types/ConfigStruct.cs | 14 + Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs | 17 + Ryujinx.Graphics.Vic/Types/MatrixStruct.cs | 25 + Ryujinx.Graphics.Vic/Types/OutputConfig.cs | 23 + .../Types/OutputSurfaceConfig.cs | 20 + Ryujinx.Graphics.Vic/Types/PipeConfig.cs | 11 + Ryujinx.Graphics.Vic/Types/PixelFormat.cs | 81 + Ryujinx.Graphics.Vic/Types/SlotConfig.cs | 63 + Ryujinx.Graphics.Vic/Types/SlotStruct.cs | 12 + .../Types/SlotSurfaceConfig.cs | 21 + Ryujinx.Graphics.Vic/VicDevice.cs | 97 + Ryujinx.Graphics.Vic/VicRegisters.cs | 47 + Ryujinx.Graphics.Video/H264PictureInfo.cs | 47 + Ryujinx.Graphics.Video/IDecoder.cs | 11 + Ryujinx.Graphics.Video/IH264Decoder.cs | 9 + Ryujinx.Graphics.Video/ISurface.cs | 18 + Ryujinx.Graphics.Video/IVp9Decoder.cs | 14 + Ryujinx.Graphics.Video/Plane.cs | 42 + .../Ryujinx.Graphics.Video.csproj | 11 + Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs | 32 + Ryujinx.Graphics.Video/Vp9EntropyProbs.cs | 36 + Ryujinx.Graphics.Video/Vp9Mv.cs | 8 + Ryujinx.Graphics.Video/Vp9MvRef.cs | 11 + Ryujinx.Graphics.Video/Vp9PictureInfo.cs | 39 + Ryujinx.HLE/HOS/Horizon.cs | 2 - Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs | 2 + .../Services/Nv/NvDrvServices/NvDeviceFile.cs | 2 + .../NvHostChannel/NvHostChannelDeviceFile.cs | 62 +- .../NvHostChannel/Types/SubmitArguments.cs | 29 +- .../NvHostCtrl/Types/NvHostSyncPt.cs | 6 +- Ryujinx.HLE/Ryujinx.HLE.csproj | 3 + Ryujinx.HLE/Switch.cs | 37 +- Ryujinx.Memory/MemoryBlock.cs | 20 +- Ryujinx.Memory/NativeMemoryManager.cs | 42 + Ryujinx.sln | 60 + Ryujinx/Ryujinx.csproj | 4 + Ryujinx/THIRDPARTY.md | 203 ++ 202 files changed, 20563 insertions(+), 2567 deletions(-) create mode 100644 Ryujinx.Common/AsyncWorkQueue.cs create mode 100644 Ryujinx.Common/Memory/ArrayPtr.cs create mode 100644 Ryujinx.Common/Memory/IArray.cs create mode 100644 Ryujinx.Common/Memory/Ptr.cs create mode 100644 Ryujinx.Common/Memory/StructArrayHelpers.cs create mode 100644 Ryujinx.Cpu/WritableRegion.cs create mode 100644 Ryujinx.Graphics.Device/AccessControl.cs create mode 100644 Ryujinx.Graphics.Device/DeviceState.cs create mode 100644 Ryujinx.Graphics.Device/IDeviceState.cs create mode 100644 Ryujinx.Graphics.Device/RegisterAttribute.cs create mode 100644 Ryujinx.Graphics.Device/RwCallback.cs create mode 100644 Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj create mode 100644 Ryujinx.Graphics.Device/SizeCalculator.cs create mode 100644 Ryujinx.Graphics.Host1x/ClassId.cs create mode 100644 Ryujinx.Graphics.Host1x/Devices.cs create mode 100644 Ryujinx.Graphics.Host1x/Host1xClass.cs create mode 100644 Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs create mode 100644 Ryujinx.Graphics.Host1x/Host1xDevice.cs create mode 100644 Ryujinx.Graphics.Host1x/OpCode.cs create mode 100644 Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj create mode 100644 Ryujinx.Graphics.Host1x/SyncptIncrManager.cs create mode 100644 Ryujinx.Graphics.Host1x/ThiDevice.cs create mode 100644 Ryujinx.Graphics.Host1x/ThiRegisters.cs create mode 100644 Ryujinx.Graphics.Nvdec.H264/Decoder.cs create mode 100644 Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs create mode 100644 Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs create mode 100644 Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj create mode 100644 Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs create mode 100644 Ryujinx.Graphics.Nvdec.H264/Surface.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Constants.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Idct.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Luts.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs create mode 100644 Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs delete mode 100644 Ryujinx.Graphics.Nvdec/CdmaProcessor.cs delete mode 100644 Ryujinx.Graphics.Nvdec/ChClassId.cs delete mode 100644 Ryujinx.Graphics.Nvdec/ChCommandEntry.cs delete mode 100644 Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs create mode 100644 Ryujinx.Graphics.Nvdec/CodecId.cs create mode 100644 Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs create mode 100644 Ryujinx.Graphics.Nvdec/H264Decoder.cs create mode 100644 Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs create mode 100644 Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs create mode 100644 Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs create mode 100644 Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs create mode 100644 Ryujinx.Graphics.Nvdec/MemoryExtensions.cs create mode 100644 Ryujinx.Graphics.Nvdec/NvdecDevice.cs create mode 100644 Ryujinx.Graphics.Nvdec/NvdecRegisters.cs create mode 100644 Ryujinx.Graphics.Nvdec/ResourceManager.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs create mode 100644 Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs delete mode 100644 Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs delete mode 100644 Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs delete mode 100644 Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs delete mode 100644 Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs delete mode 100644 Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs delete mode 100644 Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs create mode 100644 Ryujinx.Graphics.Nvdec/Vp9Decoder.cs create mode 100644 Ryujinx.Graphics.Vic/Blender.cs create mode 100644 Ryujinx.Graphics.Vic/Image/BufferPool.cs create mode 100644 Ryujinx.Graphics.Vic/Image/InputSurface.cs create mode 100644 Ryujinx.Graphics.Vic/Image/Pixel.cs create mode 100644 Ryujinx.Graphics.Vic/Image/Surface.cs create mode 100644 Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs create mode 100644 Ryujinx.Graphics.Vic/Image/SurfaceReader.cs create mode 100644 Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs create mode 100644 Ryujinx.Graphics.Vic/ResourceManager.cs create mode 100644 Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj create mode 100644 Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs create mode 100644 Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/ConfigStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/MatrixStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/OutputConfig.cs create mode 100644 Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs create mode 100644 Ryujinx.Graphics.Vic/Types/PipeConfig.cs create mode 100644 Ryujinx.Graphics.Vic/Types/PixelFormat.cs create mode 100644 Ryujinx.Graphics.Vic/Types/SlotConfig.cs create mode 100644 Ryujinx.Graphics.Vic/Types/SlotStruct.cs create mode 100644 Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs create mode 100644 Ryujinx.Graphics.Vic/VicDevice.cs create mode 100644 Ryujinx.Graphics.Vic/VicRegisters.cs create mode 100644 Ryujinx.Graphics.Video/H264PictureInfo.cs create mode 100644 Ryujinx.Graphics.Video/IDecoder.cs create mode 100644 Ryujinx.Graphics.Video/IH264Decoder.cs create mode 100644 Ryujinx.Graphics.Video/ISurface.cs create mode 100644 Ryujinx.Graphics.Video/IVp9Decoder.cs create mode 100644 Ryujinx.Graphics.Video/Plane.cs create mode 100644 Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj create mode 100644 Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs create mode 100644 Ryujinx.Graphics.Video/Vp9EntropyProbs.cs create mode 100644 Ryujinx.Graphics.Video/Vp9Mv.cs create mode 100644 Ryujinx.Graphics.Video/Vp9MvRef.cs create mode 100644 Ryujinx.Graphics.Video/Vp9PictureInfo.cs create mode 100644 Ryujinx.Memory/NativeMemoryManager.cs create mode 100644 Ryujinx/THIRDPARTY.md diff --git a/README.md b/README.md index 69041236..3400ced9 100644 --- a/README.md +++ b/README.md @@ -112,3 +112,9 @@ If you need help with setting up Ryujinx, you can ask questions in the #support If you have contributions, need support, have suggestions, or just want to get in touch with the team, join our [Discord server](https://discord.gg/N2FmfVc)! If you'd like to donate, please take a look at our [Patreon](https://www.patreon.com/ryujinx). + +## License + +This software is licensed under the terms of the MIT license. +This project makes use of code authored by the libvpx project, licensed under BSD and the ffmpeg project, licensed under LGPLv3. +See [LICENSE.txt](LICENSE.txt) and [THIRDPARTY.md](Ryujinx/THIRDPARTY.md) for more details. diff --git a/Ryujinx.Common/AsyncWorkQueue.cs b/Ryujinx.Common/AsyncWorkQueue.cs new file mode 100644 index 00000000..80f8dcfe --- /dev/null +++ b/Ryujinx.Common/AsyncWorkQueue.cs @@ -0,0 +1,100 @@ +using System; +using System.Collections.Concurrent; +using System.Threading; + +namespace Ryujinx.Common +{ + public sealed class AsyncWorkQueue : IDisposable + { + private readonly Thread _workerThread; + private readonly CancellationTokenSource _cts; + private readonly Action _workerAction; + private readonly BlockingCollection _queue; + + public bool IsCancellationRequested => _cts.IsCancellationRequested; + + public AsyncWorkQueue(Action callback, string name = null) : this(callback, name, new BlockingCollection()) + { + } + + public AsyncWorkQueue(Action callback, string name, BlockingCollection collection) + { + _cts = new CancellationTokenSource(); + _queue = collection; + _workerAction = callback; + _workerThread = new Thread(DoWork) { Name = name }; + + _workerThread.IsBackground = true; + _workerThread.Start(); + } + + private void DoWork() + { + try + { + foreach (var item in _queue.GetConsumingEnumerable(_cts.Token)) + { + _workerAction(item); + } + } + catch (OperationCanceledException) + { + } + } + + public void Cancel() + { + _cts.Cancel(); + } + + public void CancelAfter(int millisecondsDelay) + { + _cts.CancelAfter(millisecondsDelay); + } + + public void CancelAfter(TimeSpan delay) + { + _cts.CancelAfter(delay); + } + + public void Add(T workItem) + { + _queue.Add(workItem); + } + + public void Add(T workItem, CancellationToken cancellationToken) + { + _queue.Add(workItem, cancellationToken); + } + + public bool TryAdd(T workItem) + { + return _queue.TryAdd(workItem); + } + + public bool TryAdd(T workItem, int millisecondsDelay) + { + return _queue.TryAdd(workItem, millisecondsDelay); + } + + public bool TryAdd(T workItem, int millisecondsDelay, CancellationToken cancellationToken) + { + return _queue.TryAdd(workItem, millisecondsDelay, cancellationToken); + } + + public bool TryAdd(T workItem, TimeSpan timeout) + { + return _queue.TryAdd(workItem, timeout); + } + + public void Dispose() + { + _queue.CompleteAdding(); + _cts.Cancel(); + _workerThread.Join(); + + _queue.Dispose(); + _cts.Dispose(); + } + } +} diff --git a/Ryujinx.Common/Logging/LogClass.cs b/Ryujinx.Common/Logging/LogClass.cs index aad04891..a35d01a5 100644 --- a/Ryujinx.Common/Logging/LogClass.cs +++ b/Ryujinx.Common/Logging/LogClass.cs @@ -9,12 +9,14 @@ namespace Ryujinx.Common.Logging Emulation, Gpu, Hid, + Host1x, Kernel, KernelIpc, KernelScheduler, KernelSvc, Loader, ModLoader, + Nvdec, Ptc, Service, ServiceAcc, @@ -50,6 +52,7 @@ namespace Ryujinx.Common.Logging ServiceSss, ServiceTime, ServiceVi, - SurfaceFlinger + SurfaceFlinger, + Vic } } \ No newline at end of file diff --git a/Ryujinx.Common/Memory/ArrayPtr.cs b/Ryujinx.Common/Memory/ArrayPtr.cs new file mode 100644 index 00000000..7a145de2 --- /dev/null +++ b/Ryujinx.Common/Memory/ArrayPtr.cs @@ -0,0 +1,123 @@ +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.Memory +{ + /// + /// Represents an array of unmanaged resources. + /// + /// Array element type + public unsafe struct ArrayPtr : IEquatable>, IArray where T : unmanaged + { + private IntPtr _ptr; + + /// + /// Null pointer. + /// + public static ArrayPtr Null => new ArrayPtr() { _ptr = IntPtr.Zero }; + + /// + /// True if the pointer is null, false otherwise. + /// + public bool IsNull => _ptr == IntPtr.Zero; + + /// + /// Number of elements on the array. + /// + public int Length { get; } + + /// + /// Gets a reference to the item at the given index. + /// + /// + /// No bounds checks are performed, this allows negative indexing, + /// but care must be taken if the index may be out of bounds. + /// + /// Index of the element + /// Reference to the element at the given index + public ref T this[int index] => ref Unsafe.AsRef((T*)_ptr + index); + + /// + /// Creates a new array from a given reference. + /// + /// + /// For data on the heap, proper pinning is necessary during + /// use. Failure to do so will result in memory corruption and crashes. + /// + /// Reference of the first array element + /// Number of elements on the array + public ArrayPtr(ref T value, int length) + { + _ptr = (IntPtr)Unsafe.AsPointer(ref value); + Length = length; + } + + /// + /// Creates a new array from a given pointer. + /// + /// Array base pointer + /// Number of elements on the array + public ArrayPtr(T* ptr, int length) + { + _ptr = (IntPtr)ptr; + Length = length; + } + + /// + /// Creates a new array from a given pointer. + /// + /// Array base pointer + /// Number of elements on the array + public ArrayPtr(IntPtr ptr, int length) + { + _ptr = ptr; + Length = length; + } + + /// + /// Splits the array starting at the specified position. + /// + /// Index where the new array should start + /// New array starting at the specified position + public ArrayPtr Slice(int start) => new ArrayPtr(ref this[start], Length - start); + + /// + /// Gets a span from the array. + /// + /// Span of the array + public Span ToSpan() => Length == 0 ? Span.Empty : MemoryMarshal.CreateSpan(ref this[0], Length); + + /// + /// Gets the array base pointer. + /// + /// Base pointer + public T* ToPointer() => (T*)_ptr; + + public override bool Equals(object obj) + { + return obj is ArrayPtr other && Equals(other); + } + + public bool Equals([AllowNull] ArrayPtr other) + { + return _ptr == other._ptr && Length == other.Length; + } + + public override int GetHashCode() + { + return HashCode.Combine(_ptr, Length); + } + + public static bool operator ==(ArrayPtr left, ArrayPtr right) + { + return left.Equals(right); + } + + public static bool operator !=(ArrayPtr left, ArrayPtr right) + { + return !(left == right); + } + } +} diff --git a/Ryujinx.Common/Memory/IArray.cs b/Ryujinx.Common/Memory/IArray.cs new file mode 100644 index 00000000..8f17fade --- /dev/null +++ b/Ryujinx.Common/Memory/IArray.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Common.Memory +{ + /// + /// Array interface. + /// + /// Element type + public interface IArray where T : unmanaged + { + /// + /// Used to index the array. + /// + /// Element index + /// Element at the specified index + ref T this[int index] { get; } + + /// + /// Number of elements on the array. + /// + int Length { get; } + } +} diff --git a/Ryujinx.Common/Memory/Ptr.cs b/Ryujinx.Common/Memory/Ptr.cs new file mode 100644 index 00000000..66bcf569 --- /dev/null +++ b/Ryujinx.Common/Memory/Ptr.cs @@ -0,0 +1,68 @@ +using System; +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Common.Memory +{ + /// + /// Represents a pointer to an unmanaged resource. + /// + /// Type of the unmanaged resource + public unsafe struct Ptr : IEquatable> where T : unmanaged + { + private IntPtr _ptr; + + /// + /// Null pointer. + /// + public static Ptr Null => new Ptr() { _ptr = IntPtr.Zero }; + + /// + /// True if the pointer is null, false otherwise. + /// + public bool IsNull => _ptr == IntPtr.Zero; + + /// + /// Gets a reference to the value. + /// + public ref T Value => ref Unsafe.AsRef((void*)_ptr); + + /// + /// Creates a new pointer to an unmanaged resource. + /// + /// + /// For data on the heap, proper pinning is necessary during + /// use. Failure to do so will result in memory corruption and crashes. + /// + /// Reference to the unmanaged resource + public Ptr(ref T value) + { + _ptr = (IntPtr)Unsafe.AsPointer(ref value); + } + + public override bool Equals(object obj) + { + return obj is Ptr other && Equals(other); + } + + public bool Equals([AllowNull] Ptr other) + { + return _ptr == other._ptr; + } + + public override int GetHashCode() + { + return _ptr.GetHashCode(); + } + + public static bool operator ==(Ptr left, Ptr right) + { + return left.Equals(right); + } + + public static bool operator !=(Ptr left, Ptr right) + { + return !(left == right); + } + } +} diff --git a/Ryujinx.Common/Memory/StructArrayHelpers.cs b/Ryujinx.Common/Memory/StructArrayHelpers.cs new file mode 100644 index 00000000..eb8d3ce1 --- /dev/null +++ b/Ryujinx.Common/Memory/StructArrayHelpers.cs @@ -0,0 +1,518 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Common.Memory +{ + public struct Array1 : IArray where T : unmanaged + { + T _e0; + public int Length => 1; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 1); + } + public struct Array2 : IArray where T : unmanaged + { + T _e0; + Array1 _other; + public int Length => 2; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 2); + } + public struct Array3 : IArray where T : unmanaged + { + T _e0; + Array2 _other; + public int Length => 3; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 3); + } + public struct Array4 : IArray where T : unmanaged + { + T _e0; + Array3 _other; + public int Length => 4; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 4); + } + public struct Array5 : IArray where T : unmanaged + { + T _e0; + Array4 _other; + public int Length => 5; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 5); + } + public struct Array6 : IArray where T : unmanaged + { + T _e0; + Array5 _other; + public int Length => 6; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 6); + } + public struct Array7 : IArray where T : unmanaged + { + T _e0; + Array6 _other; + public int Length => 7; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 7); + } + public struct Array8 : IArray where T : unmanaged + { + T _e0; + Array7 _other; + public int Length => 8; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 8); + } + public struct Array9 : IArray where T : unmanaged + { + T _e0; + Array8 _other; + public int Length => 9; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 9); + } + public struct Array10 : IArray where T : unmanaged + { + T _e0; + Array9 _other; + public int Length => 10; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 10); + } + public struct Array11 : IArray where T : unmanaged + { + T _e0; + Array10 _other; + public int Length => 11; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 11); + } + public struct Array12 : IArray where T : unmanaged + { + T _e0; + Array11 _other; + public int Length => 12; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 12); + } + public struct Array13 : IArray where T : unmanaged + { + T _e0; + Array12 _other; + public int Length => 13; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 13); + } + public struct Array14 : IArray where T : unmanaged + { + T _e0; + Array13 _other; + public int Length => 14; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 14); + } + public struct Array15 : IArray where T : unmanaged + { + T _e0; + Array14 _other; + public int Length => 15; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 15); + } + public struct Array16 : IArray where T : unmanaged + { + T _e0; + Array15 _other; + public int Length => 16; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 16); + } + public struct Array17 : IArray where T : unmanaged + { + T _e0; + Array16 _other; + public int Length => 17; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 17); + } + public struct Array18 : IArray where T : unmanaged + { + T _e0; + Array17 _other; + public int Length => 18; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 18); + } + public struct Array19 : IArray where T : unmanaged + { + T _e0; + Array18 _other; + public int Length => 19; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 19); + } + public struct Array20 : IArray where T : unmanaged + { + T _e0; + Array19 _other; + public int Length => 20; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 20); + } + public struct Array21 : IArray where T : unmanaged + { + T _e0; + Array20 _other; + public int Length => 21; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 21); + } + public struct Array22 : IArray where T : unmanaged + { + T _e0; + Array21 _other; + public int Length => 22; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 22); + } + public struct Array23 : IArray where T : unmanaged + { + T _e0; + Array22 _other; + public int Length => 23; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 23); + } + public struct Array24 : IArray where T : unmanaged + { + T _e0; + Array23 _other; + public int Length => 24; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 24); + } + public struct Array25 : IArray where T : unmanaged + { + T _e0; + Array24 _other; + public int Length => 25; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 25); + } + public struct Array26 : IArray where T : unmanaged + { + T _e0; + Array25 _other; + public int Length => 26; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 26); + } + public struct Array27 : IArray where T : unmanaged + { + T _e0; + Array26 _other; + public int Length => 27; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 27); + } + public struct Array28 : IArray where T : unmanaged + { + T _e0; + Array27 _other; + public int Length => 28; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 28); + } + public struct Array29 : IArray where T : unmanaged + { + T _e0; + Array28 _other; + public int Length => 29; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 29); + } + public struct Array30 : IArray where T : unmanaged + { + T _e0; + Array29 _other; + public int Length => 30; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 30); + } + public struct Array31 : IArray where T : unmanaged + { + T _e0; + Array30 _other; + public int Length => 31; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 31); + } + public struct Array32 : IArray where T : unmanaged + { + T _e0; + Array31 _other; + public int Length => 32; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 32); + } + public struct Array33 : IArray where T : unmanaged + { + T _e0; + Array32 _other; + public int Length => 33; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 33); + } + public struct Array34 : IArray where T : unmanaged + { + T _e0; + Array33 _other; + public int Length => 34; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 34); + } + public struct Array35 : IArray where T : unmanaged + { + T _e0; + Array34 _other; + public int Length => 35; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 35); + } + public struct Array36 : IArray where T : unmanaged + { + T _e0; + Array35 _other; + public int Length => 36; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 36); + } + public struct Array37 : IArray where T : unmanaged + { + T _e0; + Array36 _other; + public int Length => 37; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 37); + } + public struct Array38 : IArray where T : unmanaged + { + T _e0; + Array37 _other; + public int Length => 38; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 38); + } + public struct Array39 : IArray where T : unmanaged + { + T _e0; + Array38 _other; + public int Length => 39; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 39); + } + public struct Array40 : IArray where T : unmanaged + { + T _e0; + Array39 _other; + public int Length => 40; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 40); + } + public struct Array41 : IArray where T : unmanaged + { + T _e0; + Array40 _other; + public int Length => 41; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 41); + } + public struct Array42 : IArray where T : unmanaged + { + T _e0; + Array41 _other; + public int Length => 42; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 42); + } + public struct Array43 : IArray where T : unmanaged + { + T _e0; + Array42 _other; + public int Length => 43; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 43); + } + public struct Array44 : IArray where T : unmanaged + { + T _e0; + Array43 _other; + public int Length => 44; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 44); + } + public struct Array45 : IArray where T : unmanaged + { + T _e0; + Array44 _other; + public int Length => 45; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 45); + } + public struct Array46 : IArray where T : unmanaged + { + T _e0; + Array45 _other; + public int Length => 46; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 46); + } + public struct Array47 : IArray where T : unmanaged + { + T _e0; + Array46 _other; + public int Length => 47; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 47); + } + public struct Array48 : IArray where T : unmanaged + { + T _e0; + Array47 _other; + public int Length => 48; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 48); + } + public struct Array49 : IArray where T : unmanaged + { + T _e0; + Array48 _other; + public int Length => 49; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 49); + } + public struct Array50 : IArray where T : unmanaged + { + T _e0; + Array49 _other; + public int Length => 50; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 50); + } + public struct Array51 : IArray where T : unmanaged + { + T _e0; + Array50 _other; + public int Length => 51; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 51); + } + public struct Array52 : IArray where T : unmanaged + { + T _e0; + Array51 _other; + public int Length => 52; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 52); + } + public struct Array53 : IArray where T : unmanaged + { + T _e0; + Array52 _other; + public int Length => 53; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 53); + } + public struct Array54 : IArray where T : unmanaged + { + T _e0; + Array53 _other; + public int Length => 54; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 54); + } + public struct Array55 : IArray where T : unmanaged + { + T _e0; + Array54 _other; + public int Length => 55; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 55); + } + public struct Array56 : IArray where T : unmanaged + { + T _e0; + Array55 _other; + public int Length => 56; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 56); + } + public struct Array57 : IArray where T : unmanaged + { + T _e0; + Array56 _other; + public int Length => 57; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 57); + } + public struct Array58 : IArray where T : unmanaged + { + T _e0; + Array57 _other; + public int Length => 58; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 58); + } + public struct Array59 : IArray where T : unmanaged + { + T _e0; + Array58 _other; + public int Length => 59; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 59); + } + public struct Array60 : IArray where T : unmanaged + { + T _e0; + Array59 _other; + public int Length => 60; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 60); + } + public struct Array61 : IArray where T : unmanaged + { + T _e0; + Array60 _other; + public int Length => 61; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 61); + } + public struct Array62 : IArray where T : unmanaged + { + T _e0; + Array61 _other; + public int Length => 62; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 62); + } + public struct Array63 : IArray where T : unmanaged + { + T _e0; + Array62 _other; + public int Length => 63; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 63); + } + public struct Array64 : IArray where T : unmanaged + { + T _e0; + Array63 _other; + public int Length => 64; + public ref T this[int index] => ref ToSpan()[index]; + public Span ToSpan() => MemoryMarshal.CreateSpan(ref _e0, 64); + } + +} diff --git a/Ryujinx.Cpu/MemoryManager.cs b/Ryujinx.Cpu/MemoryManager.cs index 001c41f8..211a8c0d 100644 --- a/Ryujinx.Cpu/MemoryManager.cs +++ b/Ryujinx.Cpu/MemoryManager.cs @@ -193,6 +193,38 @@ namespace Ryujinx.Cpu } } + /// + /// Gets a region of memory that can be written to. + /// + /// + /// If the requested region is not contiguous in physical memory, + /// this will perform an allocation, and flush the data (writing it + /// back to guest memory) on disposal. + /// + /// Virtual address of the data + /// Size of the data + /// A writable region of memory containing the data + public WritableRegion GetWritableRegion(ulong va, int size) + { + if (size == 0) + { + return new WritableRegion(null, va, Memory.Empty); + } + + if (IsContiguous(va, size)) + { + return new WritableRegion(null, va, _backingMemory.GetMemory(GetPhysicalAddressInternal(va), size)); + } + else + { + Memory memory = new byte[size]; + + GetSpan(va, size).CopyTo(memory.Span); + + return new WritableRegion(this, va, memory); + } + } + /// /// Gets a reference for the given type at the specified virtual memory address. /// diff --git a/Ryujinx.Cpu/WritableRegion.cs b/Ryujinx.Cpu/WritableRegion.cs new file mode 100644 index 00000000..5ea0a2d8 --- /dev/null +++ b/Ryujinx.Cpu/WritableRegion.cs @@ -0,0 +1,29 @@ +using System; + +namespace Ryujinx.Cpu +{ + public sealed class WritableRegion : IDisposable + { + private readonly MemoryManager _mm; + private readonly ulong _va; + + private bool NeedsWriteback => _mm != null; + + public Memory Memory { get; } + + internal WritableRegion(MemoryManager mm, ulong va, Memory memory) + { + _mm = mm; + _va = va; + Memory = memory; + } + + public void Dispose() + { + if (NeedsWriteback) + { + _mm.Write(_va, Memory.Span); + } + } + } +} diff --git a/Ryujinx.Graphics.Device/AccessControl.cs b/Ryujinx.Graphics.Device/AccessControl.cs new file mode 100644 index 00000000..02203783 --- /dev/null +++ b/Ryujinx.Graphics.Device/AccessControl.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Device +{ + public enum AccessControl + { + None = 0, + ReadOnly = 1 << 0, + WriteOnly = 1 << 1, + ReadWrite = ReadOnly | WriteOnly + } +} diff --git a/Ryujinx.Graphics.Device/DeviceState.cs b/Ryujinx.Graphics.Device/DeviceState.cs new file mode 100644 index 00000000..ea6942ec --- /dev/null +++ b/Ryujinx.Graphics.Device/DeviceState.cs @@ -0,0 +1,124 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Device +{ + public class DeviceState : IDeviceState where TState : unmanaged + { + private const int RegisterSize = sizeof(int); + + public TState State; + + private readonly BitArray _readableRegisters; + private readonly BitArray _writableRegisters; + + private readonly Dictionary> _readCallbacks; + private readonly Dictionary> _writeCallbacks; + + public DeviceState(IReadOnlyDictionary callbacks = null) + { + int size = (Unsafe.SizeOf() + RegisterSize - 1) / RegisterSize; + + _readableRegisters = new BitArray(size); + _writableRegisters = new BitArray(size); + + _readCallbacks = new Dictionary>(); + _writeCallbacks = new Dictionary>(); + + var fields = typeof(TState).GetFields(); + int offset = 0; + + for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++) + { + var field = fields[fieldIndex]; + var regAttr = field.GetCustomAttributes(false).FirstOrDefault(); + + int sizeOfField = SizeCalculator.SizeOf(field.FieldType); + + for (int i = 0; i < ((sizeOfField + 3) & ~3); i += 4) + { + _readableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.ReadOnly) ?? true; + _writableRegisters[(offset + i) / RegisterSize] = regAttr?.AccessControl.HasFlag(AccessControl.WriteOnly) ?? true; + } + + if (callbacks != null && callbacks.TryGetValue(field.Name, out var cb)) + { + if (cb.Read != null) + { + _readCallbacks.Add(offset, cb.Read); + } + + if (cb.Write != null) + { + _writeCallbacks.Add(offset, cb.Write); + } + } + + offset += sizeOfField; + } + + Debug.Assert(offset == Unsafe.SizeOf()); + } + + public virtual int Read(int offset) + { + if (Check(offset) && _readableRegisters[offset / RegisterSize]) + { + int alignedOffset = Align(offset); + + if (_readCallbacks.TryGetValue(alignedOffset, out Func read)) + { + return read(); + } + else + { + return GetRef(alignedOffset); + } + } + + return 0; + } + + public virtual void Write(int offset, int data) + { + if (Check(offset) && _writableRegisters[offset / RegisterSize]) + { + int alignedOffset = Align(offset); + + if (_writeCallbacks.TryGetValue(alignedOffset, out Action write)) + { + write(data); + } + else + { + GetRef(alignedOffset) = data; + } + } + } + + private bool Check(int offset) + { + return (uint)Align(offset) < Unsafe.SizeOf(); + } + + public ref T GetRef(int offset) where T : unmanaged + { + if ((uint)(offset + Unsafe.SizeOf()) > Unsafe.SizeOf()) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + + return ref Unsafe.As(ref Unsafe.AddByteOffset(ref State, (IntPtr)offset)); + } + + private static int Align(int offset) + { + return offset & ~(RegisterSize - 1); + } + } +} diff --git a/Ryujinx.Graphics.Device/IDeviceState.cs b/Ryujinx.Graphics.Device/IDeviceState.cs new file mode 100644 index 00000000..077d69f2 --- /dev/null +++ b/Ryujinx.Graphics.Device/IDeviceState.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Device +{ + public interface IDeviceState + { + int Read(int offset); + void Write(int offset, int data); + } +} diff --git a/Ryujinx.Graphics.Device/RegisterAttribute.cs b/Ryujinx.Graphics.Device/RegisterAttribute.cs new file mode 100644 index 00000000..6e198963 --- /dev/null +++ b/Ryujinx.Graphics.Device/RegisterAttribute.cs @@ -0,0 +1,15 @@ +using System; + +namespace Ryujinx.Graphics.Device +{ + [AttributeUsage(AttributeTargets.Field, AllowMultiple = false)] + public sealed class RegisterAttribute : Attribute + { + public AccessControl AccessControl { get; } + + public RegisterAttribute(AccessControl ac) + { + AccessControl = ac; + } + } +} diff --git a/Ryujinx.Graphics.Device/RwCallback.cs b/Ryujinx.Graphics.Device/RwCallback.cs new file mode 100644 index 00000000..6f1c8898 --- /dev/null +++ b/Ryujinx.Graphics.Device/RwCallback.cs @@ -0,0 +1,16 @@ +using System; + +namespace Ryujinx.Graphics.Device +{ + public struct RwCallback + { + public Action Write { get; } + public Func Read { get; } + + public RwCallback(Action write, Func read) + { + Write = write; + Read = read; + } + } +} diff --git a/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj b/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj new file mode 100644 index 00000000..7c4ae4ca --- /dev/null +++ b/Ryujinx.Graphics.Device/Ryujinx.Graphics.Device.csproj @@ -0,0 +1,7 @@ + + + + netcoreapp3.1 + + + diff --git a/Ryujinx.Graphics.Device/SizeCalculator.cs b/Ryujinx.Graphics.Device/SizeCalculator.cs new file mode 100644 index 00000000..7cc48915 --- /dev/null +++ b/Ryujinx.Graphics.Device/SizeCalculator.cs @@ -0,0 +1,63 @@ +using System; +using System.Reflection; + +namespace Ryujinx.Graphics.Device +{ + static class SizeCalculator + { + public static int SizeOf(Type type) + { + // Is type a enum type? + if (type.IsEnum) + { + type = type.GetEnumUnderlyingType(); + } + + // Is type a pointer type? + if (type.IsPointer || type == typeof(IntPtr) || type == typeof(UIntPtr)) + { + return IntPtr.Size; + } + + // Is type a struct type? + if (type.IsValueType && !type.IsPrimitive) + { + // Check if the struct has a explicit size, if so, return that. + if (type.StructLayoutAttribute.Size != 0) + { + return type.StructLayoutAttribute.Size; + } + + // Otherwise we calculate the sum of the sizes of all fields. + int size = 0; + var fields = type.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance); + + for (int fieldIndex = 0; fieldIndex < fields.Length; fieldIndex++) + { + size += SizeOf(fields[fieldIndex].FieldType); + } + + return size; + } + + // Primitive types. + return (Type.GetTypeCode(type)) switch + { + TypeCode.SByte => sizeof(sbyte), + TypeCode.Byte => sizeof(byte), + TypeCode.Int16 => sizeof(short), + TypeCode.UInt16 => sizeof(ushort), + TypeCode.Int32 => sizeof(int), + TypeCode.UInt32 => sizeof(uint), + TypeCode.Int64 => sizeof(long), + TypeCode.UInt64 => sizeof(ulong), + TypeCode.Char => sizeof(char), + TypeCode.Single => sizeof(float), + TypeCode.Double => sizeof(double), + TypeCode.Decimal => sizeof(decimal), + TypeCode.Boolean => sizeof(bool), + _ => throw new ArgumentException($"Length for type \"{type.Name}\" is unknown.") + }; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs index e40984af..60fba006 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs @@ -67,7 +67,7 @@ namespace Ryujinx.Graphics.Gpu.Engine TextureManager.SetComputeTextureBufferIndex(state.Get(MethodOffset.TextureBufferIndex)); - ShaderProgramInfo info = cs.Shaders[0].Program.Info; + ShaderProgramInfo info = cs.Shaders[0].Program.Info; for (int index = 0; index < info.CBuffers.Count; index++) { diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs index c8d47b9f..225c732e 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodConditionalRendering.cs @@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Gpu.Engine else { evt.Flush(); - return (_context.MemoryAccessor.ReadUInt64(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False; + return (_context.MemoryAccessor.Read(gpuVa) != 0) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False; } } @@ -87,11 +87,11 @@ namespace Ryujinx.Graphics.Gpu.Engine if (evt != null && evt2 == null) { - useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.ReadUInt64(gpuVa + 16), isEqual); + useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt, _context.MemoryAccessor.Read(gpuVa + 16), isEqual); } else if (evt == null && evt2 != null) { - useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.ReadUInt64(gpuVa), isEqual); + useHost = _context.Renderer.Pipeline.TryHostConditionalRendering(evt2, _context.MemoryAccessor.Read(gpuVa), isEqual); } else { @@ -107,8 +107,8 @@ namespace Ryujinx.Graphics.Gpu.Engine evt?.Flush(); evt2?.Flush(); - ulong x = _context.MemoryAccessor.ReadUInt64(gpuVa); - ulong y = _context.MemoryAccessor.ReadUInt64(gpuVa + 16); + ulong x = _context.MemoryAccessor.Read(gpuVa); + ulong y = _context.MemoryAccessor.Read(gpuVa + 16); return (isEqual ? x == y : x != y) ? ConditionalRenderEnabled.True : ConditionalRenderEnabled.False; } diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs index 06298cdf..d5b11c2c 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs @@ -466,7 +466,7 @@ namespace Ryujinx.Graphics.Gpu.Engine bool flipY = yControl.HasFlag(YControl.NegateY); Origin origin = yControl.HasFlag(YControl.TriangleRastFlip) ? Origin.LowerLeft : Origin.UpperLeft; - + _context.Renderer.Pipeline.SetOrigin(origin); // The triangle rast flip flag only affects rasterization, the viewport is not flipped. diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index 5e117831..b07694b9 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -77,7 +77,7 @@ namespace Ryujinx.Graphics.Gpu { Renderer = renderer; - MemoryManager = new MemoryManager(); + MemoryManager = new MemoryManager(this); MemoryAccessor = new MemoryAccessor(this); diff --git a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs index ccd56ae2..69bee541 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureManager.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureManager.cs @@ -643,6 +643,8 @@ namespace Ryujinx.Graphics.Gpu.Image overlap.ChangeSize(info.Width, info.Height, info.DepthOrLayers); } + overlap.SynchronizeMemory(); + return overlap; } } diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs index 38f448d9..5cc8ec24 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryAccessor.cs @@ -58,42 +58,6 @@ namespace Ryujinx.Graphics.Gpu.Memory return MemoryMarshal.Cast(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf()))[0]; } - /// - /// Reads a 32-bits signed integer from GPU mapped memory. - /// - /// GPU virtual address where the value is located - /// The value at the specified memory location - public int ReadInt32(ulong gpuVa) - { - ulong processVa = _context.MemoryManager.Translate(gpuVa); - - return _context.PhysicalMemory.Read(processVa); - } - - /// - /// Reads a 64-bits unsigned integer from GPU mapped memory. - /// - /// GPU virtual address where the value is located - /// The value at the specified memory location - public ulong ReadUInt64(ulong gpuVa) - { - ulong processVa = _context.MemoryManager.Translate(gpuVa); - - return _context.PhysicalMemory.Read(processVa); - } - - /// - /// Reads a 8-bits unsigned integer from GPU mapped memory. - /// - /// GPU virtual address where the value is located - /// The value to be written - public void WriteByte(ulong gpuVa, byte value) - { - ulong processVa = _context.MemoryManager.Translate(gpuVa); - - _context.PhysicalMemory.Write(processVa, MemoryMarshal.CreateSpan(ref value, 1)); - } - /// /// Writes a 32-bits signed integer to GPU mapped memory. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index a9a8fbac..2d988f8d 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -1,4 +1,7 @@ +using Ryujinx.Cpu; using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Memory { @@ -33,14 +36,69 @@ namespace Ryujinx.Graphics.Gpu.Memory public event EventHandler MemoryUnmapped; + private GpuContext _context; + /// /// Creates a new instance of the GPU memory manager. /// - public MemoryManager() + public MemoryManager(GpuContext context) { + _context = context; _pageTable = new ulong[PtLvl0Size][]; } + /// + /// Reads data from GPU mapped memory. + /// + /// Type of the data + /// GPU virtual address where the data is located + /// The data at the specified memory location + public T Read(ulong gpuVa) where T : unmanaged + { + ulong processVa = Translate(gpuVa); + + return MemoryMarshal.Cast(_context.PhysicalMemory.GetSpan(processVa, Unsafe.SizeOf()))[0]; + } + + /// + /// Gets a read-only span of data from GPU mapped memory. + /// This reads as much data as possible, up to the specified maximum size. + /// + /// GPU virtual address where the data is located + /// Size of the data + /// The span of the data at the specified memory location + public ReadOnlySpan GetSpan(ulong gpuVa, int size) + { + ulong processVa = Translate(gpuVa); + + return _context.PhysicalMemory.GetSpan(processVa, size); + } + + /// + /// Gets a writable region from GPU mapped memory. + /// + /// Start address of the range + /// Size in bytes to be range + /// A writable region with the data at the specified memory location + public WritableRegion GetWritableRegion(ulong gpuVa, int size) + { + ulong processVa = Translate(gpuVa); + + return _context.PhysicalMemory.GetWritableRegion(processVa, size); + } + + /// + /// Writes data to GPU mapped memory. + /// + /// GPU virtual address to write the data into + /// The data to be written + public void Write(ulong gpuVa, ReadOnlySpan data) + { + ulong processVa = Translate(gpuVa); + + _context.PhysicalMemory.Write(processVa, data); + } + /// /// Maps a given range of pages to the specified CPU virtual address. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs index 4a80aa1a..88beab8f 100644 --- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs +++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs @@ -1,3 +1,4 @@ +using Ryujinx.Cpu; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -34,6 +35,17 @@ namespace Ryujinx.Graphics.Gpu.Memory return _cpuMemory.GetSpan(address, size); } + /// + /// Gets a writable region from the application process. + /// + /// Start address of the range + /// Size in bytes to be range + /// A writable region with the data at the specified memory location + public WritableRegion GetWritableRegion(ulong address, int size) + { + return _cpuMemory.GetWritableRegion(address, size); + } + /// /// Reads data from the application process. /// diff --git a/Ryujinx.Graphics.Host1x/ClassId.cs b/Ryujinx.Graphics.Host1x/ClassId.cs new file mode 100644 index 00000000..dfeadd4c --- /dev/null +++ b/Ryujinx.Graphics.Host1x/ClassId.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics.Host1x +{ + public enum ClassId + { + Host1x = 0x1, + Mpeg = 0x20, + Nvenc = 0x21, + Vi = 0x30, + Isp = 0x32, + Ispb = 0x34, + Vii2c = 0x36, + Vic = 0x5d, + Gr3d = 0x60, + Gpu = 0x61, + Tsec = 0xe0, + Tsecb = 0xe1, + Nvjpg = 0xc0, + Nvdec = 0xf0 + } +} diff --git a/Ryujinx.Graphics.Host1x/Devices.cs b/Ryujinx.Graphics.Host1x/Devices.cs new file mode 100644 index 00000000..5b3bed6b --- /dev/null +++ b/Ryujinx.Graphics.Host1x/Devices.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Host1x +{ + class Devices : IDisposable + { + private readonly Dictionary _devices = new Dictionary(); + + public void RegisterDevice(ClassId classId, IDeviceState device) + { + _devices[classId] = device; + } + + public IDeviceState GetDevice(ClassId classId) + { + return _devices.TryGetValue(classId, out IDeviceState device) ? device : null; + } + + public void Dispose() + { + foreach (var device in _devices.Values) + { + if (device is ThiDevice thi) + { + thi.Dispose(); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Host1x/Host1xClass.cs b/Ryujinx.Graphics.Host1x/Host1xClass.cs new file mode 100644 index 00000000..1a1297f9 --- /dev/null +++ b/Ryujinx.Graphics.Host1x/Host1xClass.cs @@ -0,0 +1,33 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Synchronization; +using System.Collections.Generic; +using System.Threading; + +namespace Ryujinx.Graphics.Host1x +{ + public class Host1xClass : IDeviceState + { + private readonly SynchronizationManager _syncMgr; + private readonly DeviceState _state; + + public Host1xClass(SynchronizationManager syncMgr) + { + _syncMgr = syncMgr; + _state = new DeviceState(new Dictionary + { + { nameof(Host1xClassRegisters.WaitSyncpt32), new RwCallback(WaitSyncpt32, null) } + }); + } + + public int Read(int offset) => _state.Read(offset); + public void Write(int offset, int data) => _state.Write(offset, data); + + private void WaitSyncpt32(int data) + { + uint syncpointId = (uint)(data & 0xFF); + uint threshold = _state.State.LoadSyncptPayload32; + + _syncMgr.WaitOnSyncpoint(syncpointId, threshold, Timeout.InfiniteTimeSpan); + } + } +} diff --git a/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs b/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs new file mode 100644 index 00000000..e476bdfa --- /dev/null +++ b/Ryujinx.Graphics.Host1x/Host1xClassRegisters.cs @@ -0,0 +1,41 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Host1x +{ + struct Host1xClassRegisters + { + public uint IncrSyncpt; + public uint IncrSyncptCntrl; + public uint IncrSyncptError; + public Array5 ReservedC; + public uint WaitSyncpt; + public uint WaitSyncptBase; + public uint WaitSyncptIncr; + public uint LoadSyncptBase; + public uint IncrSyncptBase; + public uint Clear; + public uint Wait; + public uint WaitWithIntr; + public uint DelayUsec; + public uint TickcountHi; + public uint TickcountLo; + public uint Tickctrl; + public Array23 Reserved50; + public uint Indctrl; + public uint Indoff2; + public uint Indoff; + public Array31 Inddata; + public uint Reserved134; + public uint LoadSyncptPayload32; + public uint Stallctrl; + public uint WaitSyncpt32; + public uint WaitSyncptBase32; + public uint LoadSyncptBase32; + public uint IncrSyncptBase32; + public uint StallcountHi; + public uint StallcountLo; + public uint Xrefctrl; + public uint ChannelXrefHi; + public uint ChannelXrefLo; + } +} diff --git a/Ryujinx.Graphics.Host1x/Host1xDevice.cs b/Ryujinx.Graphics.Host1x/Host1xDevice.cs new file mode 100644 index 00000000..6406378f --- /dev/null +++ b/Ryujinx.Graphics.Host1x/Host1xDevice.cs @@ -0,0 +1,123 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Synchronization; +using System; +using System.Numerics; + +namespace Ryujinx.Graphics.Host1x +{ + public sealed class Host1xDevice : IDisposable + { + private readonly SyncptIncrManager _syncptIncrMgr; + private readonly AsyncWorkQueue _commandQueue; + + private readonly Devices _devices = new Devices(); + + public Host1xClass Class { get; } + + private IDeviceState _device; + + private int _count; + private int _offset; + private int _mask; + private bool _incrementing; + + public Host1xDevice(SynchronizationManager syncMgr) + { + _syncptIncrMgr = new SyncptIncrManager(syncMgr); + _commandQueue = new AsyncWorkQueue(Process, "Ryujinx.Host1xProcessor"); + + Class = new Host1xClass(syncMgr); + + _devices.RegisterDevice(ClassId.Host1x, Class); + } + + public void RegisterDevice(ClassId classId, IDeviceState device) + { + var thi = new ThiDevice(classId, device ?? throw new ArgumentNullException(nameof(device)), _syncptIncrMgr); + _devices.RegisterDevice(classId, thi); + } + + public void Submit(ReadOnlySpan commandBuffer) + { + _commandQueue.Add(commandBuffer.ToArray()); + } + + private void Process(int[] commandBuffer) + { + for (int index = 0; index < commandBuffer.Length; index++) + { + Step(commandBuffer[index]); + } + } + + private void Step(int value) + { + if (_mask != 0) + { + int lbs = BitOperations.TrailingZeroCount(_mask); + + _mask &= ~(1 << lbs); + + DeviceWrite(_offset + lbs, value); + + return; + } + else if (_count != 0) + { + _count--; + + DeviceWrite(_offset, value); + + if (_incrementing) + { + _offset++; + } + + return; + } + + OpCode opCode = (OpCode)((value >> 28) & 0xf); + + switch (opCode) + { + case OpCode.SetClass: + _mask = value & 0x3f; + ClassId classId = (ClassId)((value >> 6) & 0x3ff); + _offset = (value >> 16) & 0xfff; + _device = _devices.GetDevice(classId); + break; + case OpCode.Incr: + case OpCode.NonIncr: + _count = value & 0xffff; + _offset = (value >> 16) & 0xfff; + _incrementing = opCode == OpCode.Incr; + break; + case OpCode.Mask: + _mask = value & 0xffff; + _offset = (value >> 16) & 0xfff; + break; + case OpCode.Imm: + int data = value & 0xfff; + _offset = (value >> 16) & 0xfff; + DeviceWrite(_offset, data); + break; + default: + Logger.PrintError(LogClass.Host1x, $"Unsupported opcode \"{opCode}\"."); + break; + } + } + + private void DeviceWrite(int offset, int data) + { + _device?.Write(offset * 4, data); + } + + public void Dispose() + { + _commandQueue.Dispose(); + _devices.Dispose(); + } + } +} diff --git a/Ryujinx.Graphics.Host1x/OpCode.cs b/Ryujinx.Graphics.Host1x/OpCode.cs new file mode 100644 index 00000000..2ec6034b --- /dev/null +++ b/Ryujinx.Graphics.Host1x/OpCode.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Host1x +{ + enum OpCode + { + SetClass, + Incr, + NonIncr, + Mask, + Imm, + Restart, + Gather, + SetStrmId, + SetAppId, + SetPyld, + IncrW, + NonIncrW, + GatherW, + RestartW, + Extend + } +} diff --git a/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj b/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj new file mode 100644 index 00000000..4c0736cf --- /dev/null +++ b/Ryujinx.Graphics.Host1x/Ryujinx.Graphics.Host1x.csproj @@ -0,0 +1,20 @@ + + + + netcoreapp3.1 + + + + false + + + + false + + + + + + + + diff --git a/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs b/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs new file mode 100644 index 00000000..82ac5e7d --- /dev/null +++ b/Ryujinx.Graphics.Host1x/SyncptIncrManager.cs @@ -0,0 +1,99 @@ +using Ryujinx.Graphics.Gpu.Synchronization; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Host1x +{ + class SyncptIncrManager + { + private readonly SynchronizationManager _syncMgr; + + private struct SyncptIncr + { + public uint Id { get; } + public ClassId ClassId { get; } + public uint SyncptId { get; } + public bool Done { get; } + + public SyncptIncr(uint id, ClassId classId, uint syncptId, bool done = false) + { + Id = id; + ClassId = classId; + SyncptId = syncptId; + Done = done; + } + } + + private readonly List _incrs = new List(); + + private uint _currentId; + + public SyncptIncrManager(SynchronizationManager syncMgr) + { + _syncMgr = syncMgr; + } + + public void Increment(uint id) + { + lock (_incrs) + { + _incrs.Add(new SyncptIncr(0, 0, id, true)); + + IncrementAllDone(); + } + } + + public uint IncrementWhenDone(ClassId classId, uint id) + { + lock (_incrs) + { + uint handle = _currentId++; + + _incrs.Add(new SyncptIncr(handle, classId, id)); + + return handle; + } + } + + public void SignalDone(uint handle) + { + lock (_incrs) + { + // Set pending increment with the given handle to "done". + for (int i = 0; i < _incrs.Count; i++) + { + SyncptIncr incr = _incrs[i]; + + if (_incrs[i].Id == handle) + { + _incrs[i] = new SyncptIncr(incr.Id, incr.ClassId, incr.SyncptId, true); + + break; + } + } + + IncrementAllDone(); + } + } + + private void IncrementAllDone() + { + lock (_incrs) + { + // Increment all sequential pending increments that are already done. + int doneCount = 0; + + for (; doneCount < _incrs.Count; doneCount++) + { + if (!_incrs[doneCount].Done) + { + break; + } + + _syncMgr.IncrementSyncpoint(_incrs[doneCount].SyncptId); + } + + _incrs.RemoveRange(0, doneCount); + } + } + } +} diff --git a/Ryujinx.Graphics.Host1x/ThiDevice.cs b/Ryujinx.Graphics.Host1x/ThiDevice.cs new file mode 100644 index 00000000..8e3e11b0 --- /dev/null +++ b/Ryujinx.Graphics.Host1x/ThiDevice.cs @@ -0,0 +1,96 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Host1x +{ + class ThiDevice : IDeviceState, IDisposable + { + private readonly ClassId _classId; + private readonly IDeviceState _device; + + private readonly SyncptIncrManager _syncptIncrMgr; + + private class CommandAction + { + public int Data { get; } + + public CommandAction(int data) + { + Data = data; + } + } + + private class MethodCallAction : CommandAction + { + public int Method { get; } + + public MethodCallAction(int method, int data) : base(data) + { + Method = method; + } + } + + private class SyncptIncrAction : CommandAction + { + public SyncptIncrAction(uint syncptIncrHandle) : base((int)syncptIncrHandle) + { + } + } + + private readonly AsyncWorkQueue _commandQueue; + + private readonly DeviceState _state; + + public ThiDevice(ClassId classId, IDeviceState device, SyncptIncrManager syncptIncrMgr) + { + _classId = classId; + _device = device; + _syncptIncrMgr = syncptIncrMgr; + _commandQueue = new AsyncWorkQueue(Process, $"Ryujinx.{classId}Processor"); + _state = new DeviceState(new Dictionary + { + { nameof(ThiRegisters.IncrSyncpt), new RwCallback(IncrSyncpt, null) }, + { nameof(ThiRegisters.Method1), new RwCallback(Method1, null) } + }); + } + + public int Read(int offset) => _state.Read(offset); + public void Write(int offset, int data) => _state.Write(offset, data); + + private void IncrSyncpt(int data) + { + uint syncpointId = (uint)(data & 0xFF); + uint cond = (uint)((data >> 8) & 0xFF); // 0 = Immediate, 1 = Done + + if (cond == 0) + { + _syncptIncrMgr.Increment(syncpointId); + } + else + { + _commandQueue.Add(new SyncptIncrAction(_syncptIncrMgr.IncrementWhenDone(_classId, syncpointId))); + } + } + + private void Method1(int data) + { + _commandQueue.Add(new MethodCallAction((int)_state.State.Method0 * 4, data)); + } + + private void Process(CommandAction cmdAction) + { + if (cmdAction is SyncptIncrAction syncptIncrAction) + { + _syncptIncrMgr.SignalDone((uint)syncptIncrAction.Data); + } + else if (cmdAction is MethodCallAction methodCallAction) + { + _device.Write(methodCallAction.Method, methodCallAction.Data); + } + } + + public void Dispose() => _commandQueue.Dispose(); + } +} diff --git a/Ryujinx.Graphics.Host1x/ThiRegisters.cs b/Ryujinx.Graphics.Host1x/ThiRegisters.cs new file mode 100644 index 00000000..00c93182 --- /dev/null +++ b/Ryujinx.Graphics.Host1x/ThiRegisters.cs @@ -0,0 +1,22 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Host1x +{ + struct ThiRegisters + { + public uint IncrSyncpt; + public uint Reserved4; + public uint IncrSyncptErr; + public uint CtxswIncrSyncpt; + public Array4 Reserved10; + public uint Ctxsw; + public uint Reserved24; + public uint ContSyncptEof; + public Array5 Reserved2C; + public uint Method0; + public uint Method1; + public Array12 Reserved48; + public uint IntStatus; + public uint IntMask; + } +} diff --git a/Ryujinx.Graphics.Nvdec.H264/Decoder.cs b/Ryujinx.Graphics.Nvdec.H264/Decoder.cs new file mode 100644 index 00000000..7a7e184a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/Decoder.cs @@ -0,0 +1,40 @@ +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec.H264 +{ + public class Decoder : IH264Decoder + { + public bool IsHardwareAccelerated => false; + + private const int WorkBufferSize = 0x200; + + private readonly byte[] _workBuffer = new byte[WorkBufferSize]; + + private readonly FFmpegContext _context = new FFmpegContext(); + + public ISurface CreateSurface(int width, int height) + { + return new Surface(); + } + + public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan bitstream) + { + Span bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer)); + + return _context.DecodeFrame((Surface)output, bs) == 0; + } + + private static byte[] Prepend(ReadOnlySpan data, ReadOnlySpan prep) + { + byte[] output = new byte[data.Length + prep.Length]; + + prep.CopyTo(output); + data.CopyTo(new Span(output).Slice(prep.Length)); + + return output; + } + + public void Dispose() => _context.Dispose(); + } +} diff --git a/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs new file mode 100644 index 00000000..b4f9206b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/FFmpegContext.cs @@ -0,0 +1,51 @@ +using FFmpeg.AutoGen; +using System; + +namespace Ryujinx.Graphics.Nvdec.H264 +{ + unsafe class FFmpegContext : IDisposable + { + private readonly AVCodec* _codec; + private AVCodecContext* _context; + + public FFmpegContext() + { + _codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264); + _context = ffmpeg.avcodec_alloc_context3(_codec); + + ffmpeg.avcodec_open2(_context, _codec, null); + } + + public int DecodeFrame(Surface output, ReadOnlySpan bitstream) + { + AVPacket packet; + + ffmpeg.av_init_packet(&packet); + + fixed (byte* ptr = bitstream) + { + packet.data = ptr; + packet.size = bitstream.Length; + + int rc = ffmpeg.avcodec_send_packet(_context, &packet); + + if (rc != 0) + { + return rc; + } + } + + return ffmpeg.avcodec_receive_frame(_context, output.Frame); + } + + public void Dispose() + { + ffmpeg.avcodec_close(_context); + + fixed (AVCodecContext** ppContext = &_context) + { + ffmpeg.avcodec_free_context(ppContext); + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs new file mode 100644 index 00000000..c0e2357d --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/H264BitStreamWriter.cs @@ -0,0 +1,121 @@ +using System; +using System.Numerics; + +namespace Ryujinx.Graphics.Nvdec.H264 +{ + struct H264BitStreamWriter + { + private const int BufferSize = 8; + + private readonly byte[] _workBuffer; + + private int _offset; + private int _buffer; + private int _bufferPos; + + public H264BitStreamWriter(byte[] workBuffer) + { + _workBuffer = workBuffer; + _offset = 0; + _buffer = 0; + _bufferPos = 0; + } + + public void WriteBit(bool value) + { + WriteBits(value ? 1 : 0, 1); + } + + public void WriteBits(int value, int valueSize) + { + int valuePos = 0; + + int remaining = valueSize; + + while (remaining > 0) + { + int copySize = remaining; + + int free = GetFreeBufferBits(); + + if (copySize > free) + { + copySize = free; + } + + int mask = (1 << copySize) - 1; + + int srcShift = (valueSize - valuePos) - copySize; + int dstShift = (BufferSize - _bufferPos) - copySize; + + _buffer |= ((value >> srcShift) & mask) << dstShift; + + valuePos += copySize; + _bufferPos += copySize; + remaining -= copySize; + } + } + + private int GetFreeBufferBits() + { + if (_bufferPos == BufferSize) + { + Flush(); + } + + return BufferSize - _bufferPos; + } + + public void Flush() + { + if (_bufferPos != 0) + { + _workBuffer[_offset++] = (byte)_buffer; + + _buffer = 0; + _bufferPos = 0; + } + } + + public void End() + { + WriteBit(true); + + Flush(); + } + + public Span AsSpan() + { + return new Span(_workBuffer).Slice(0, _offset); + } + + public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize); + public void WriteSe(int value) => WriteExpGolombCodedInt(value); + public void WriteUe(uint value) => WriteExpGolombCodedUInt(value); + + private void WriteExpGolombCodedInt(int value) + { + int sign = value <= 0 ? 0 : 1; + + if (value < 0) + { + value = -value; + } + + value = (value << 1) - sign; + + WriteExpGolombCodedUInt((uint)value); + } + + private void WriteExpGolombCodedUInt(uint value) + { + int size = 32 - BitOperations.LeadingZeroCount(value + 1); + + WriteBits(1, size); + + value -= (1u << (size - 1)) - 1; + + WriteBits((int)value, size - 1); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj b/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj new file mode 100644 index 00000000..cda0d933 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/Ryujinx.Graphics.Nvdec.H264.csproj @@ -0,0 +1,23 @@ + + + + netcoreapp3.1 + + + + true + + + + true + + + + + + + + + + + diff --git a/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs new file mode 100644 index 00000000..e75c555e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/SpsAndPpsReconstruction.cs @@ -0,0 +1,159 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec.H264 +{ + static class SpsAndPpsReconstruction + { + public static Span Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer) + { + H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer); + + // Sequence Parameter Set. + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); // Profile idc + writer.WriteU(0, 8); // Reserved + writer.WriteU(31, 8); // Level idc + writer.WriteUe(0); // Seq parameter set id + writer.WriteUe(pictureInfo.ChromaFormatIdc); + + if (pictureInfo.ChromaFormatIdc == 3) + { + writer.WriteBit(false); // Separate colour plane flag + } + + writer.WriteUe(0); // Bit depth luma minus 8 + writer.WriteUe(0); // Bit depth chroma minus 8 + writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag); + writer.WriteBit(false); // Scaling matrix present flag + + writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4); + writer.WriteUe(pictureInfo.PicOrderCntType); + + if (pictureInfo.PicOrderCntType == 0) + { + writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4); + } + else if (pictureInfo.PicOrderCntType == 1) + { + writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag); + + writer.WriteSe(0); // Offset for non-ref pic + writer.WriteSe(0); // Offset for top to bottom field + writer.WriteUe(0); // Num ref frames in pic order cnt cycle + } + + writer.WriteUe(16); // Max num ref frames + writer.WriteBit(false); // Gaps in frame num value allowed flag + writer.WriteUe(pictureInfo.PicWidthInMbsMinus1); + writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1); + writer.WriteBit(pictureInfo.FrameMbsOnlyFlag); + + if (!pictureInfo.FrameMbsOnlyFlag) + { + writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag); + } + + writer.WriteBit(pictureInfo.Direct8x8InferenceFlag); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // Picture Parameter Set. + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); // Pic parameter set id + writer.WriteUe(0); // Seq parameter set id + + writer.WriteBit(pictureInfo.EntropyCodingModeFlag); + writer.WriteBit(false); // Bottom field pic order in frame present flag + writer.WriteUe(0); // Num slice groups minus 1 + writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1); + writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1); + writer.WriteBit(pictureInfo.WeightedPredFlag); + writer.WriteU(pictureInfo.WeightedBipredIdc, 2); + writer.WriteSe(pictureInfo.PicInitQpMinus26); + writer.WriteSe(0); // Pic init qs minus 26 + writer.WriteSe(pictureInfo.ChromaQpIndexOffset); + writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag); + writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag); + writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag); + writer.WriteBit(pictureInfo.Transform8x8ModeFlag); + + writer.WriteBit(pictureInfo.ScalingMatrixPresent); + + if (pictureInfo.ScalingMatrixPresent) + { + for (int index = 0; index < 6; index++) + { + writer.WriteBit(true); + + WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]); + } + + if (pictureInfo.Transform8x8ModeFlag) + { + for (int index = 0; index < 2; index++) + { + writer.WriteBit(true); + + WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]); + } + } + } + + writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset); + + writer.End(); + + return writer.AsSpan(); + } + + // ZigZag LUTs from libavcodec. + private static readonly byte[] ZigZagDirect = new byte[] + { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 + }; + + private static readonly byte[] ZigZagScan = new byte[] + { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4 + }; + + private static void WriteScalingList(ref H264BitStreamWriter writer, IArray list) + { + byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect; + + int lastScale = 8; + + for (int index = 0; index < list.Length; index++) + { + byte value = list[scan[index]]; + + int deltaScale = value - lastScale; + + writer.WriteSe(deltaScale); + + lastScale = value; + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.H264/Surface.cs b/Ryujinx.Graphics.Nvdec.H264/Surface.cs new file mode 100644 index 00000000..a6c16ba3 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.H264/Surface.cs @@ -0,0 +1,33 @@ +using FFmpeg.AutoGen; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec.H264 +{ + unsafe class Surface : ISurface + { + public AVFrame* Frame { get; } + + public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height); + public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight); + public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight); + + public int Width => Frame->width; + public int Height => Frame->height; + public int Stride => Frame->linesize[0]; + public int UvWidth => (Frame->width + 1) >> 1; + public int UvHeight => (Frame->height + 1) >> 1; + public int UvStride => Frame->linesize[1]; + + public Surface() + { + Frame = ffmpeg.av_frame_alloc(); + } + + public void Dispose() + { + ffmpeg.av_frame_unref(Frame); + ffmpeg.av_free(Frame); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs b/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs new file mode 100644 index 00000000..b7b70953 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/BitDepth.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal enum BitDepth + { + Bits8 = 8, /**< 8 bits */ + Bits10 = 10, /**< 10 bits */ + Bits12 = 12, /**< 12 bits */ + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs b/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs new file mode 100644 index 00000000..b695fed5 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/CodecErr.cs @@ -0,0 +1,56 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal enum CodecErr + { + /*!\brief Operation completed without error */ + CodecOk, + + /*!\brief Unspecified error */ + CodecError, + + /*!\brief Memory operation failed */ + CodecMemError, + + /*!\brief ABI version mismatch */ + CodecAbiMismatch, + + /*!\brief Algorithm does not have required capability */ + CodecIncapable, + + /*!\brief The given bitstream is not supported. + * + * The bitstream was unable to be parsed at the highest level. The decoder + * is unable to proceed. This error \ref SHOULD be treated as fatal to the + * stream. */ + CodecUnsupBitstream, + + /*!\brief Encoded bitstream uses an unsupported feature + * + * The decoder does not implement a feature required by the encoder. This + * return code should only be used for features that prevent future + * pictures from being properly decoded. This error \ref MAY be treated as + * fatal to the stream or \ref MAY be treated as fatal to the current GOP. + */ + CodecUnsupFeature, + + /*!\brief The coded data for this stream is corrupt or incomplete + * + * There was a problem decoding the current frame. This return code + * should only be used for failures that prevent future pictures from + * being properly decoded. This error \ref MAY be treated as fatal to the + * stream or \ref MAY be treated as fatal to the current GOP. If decoding + * is continued for the current GOP, artifacts may be present. + */ + CodecCorruptFrame, + + /*!\brief An application-supplied parameter is not valid. + * + */ + CodecInvalidParam, + + /*!\brief An iterator reached the end of list. + * + */ + CodecListEnd + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs new file mode 100644 index 00000000..a7c6d148 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs @@ -0,0 +1,59 @@ +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Common +{ + internal static class BitUtils + { + // FIXME: Enable inlining here after AVX2 gather bug is fixed. + // [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static byte ClipPixel(int val) + { + return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ushort ClipPixelHighbd(int val, int bd) + { + return bd switch + { + 10 => (ushort)Math.Clamp(val, 0, 1023), + 12 => (ushort)Math.Clamp(val, 0, 4095), + _ => (ushort)Math.Clamp(val, 0, 255) + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int RoundPowerOfTwo(int value, int n) + { + return (value + (1 << (n - 1))) >> n; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long RoundPowerOfTwo(long value, int n) + { + return (value + (1L << (n - 1))) >> n; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int AlignPowerOfTwo(int value, int n) + { + return (value + ((1 << n) - 1)) & ~((1 << n) - 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int GetMsb(uint n) + { + Debug.Assert(n != 0); + return 31 ^ BitOperations.LeadingZeroCount(n); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetUnsignedBits(uint numValues) + { + return numValues > 0 ? GetMsb(numValues) + 1 : 0; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs new file mode 100644 index 00000000..473dd904 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryAllocator.cs @@ -0,0 +1,94 @@ +using Ryujinx.Common.Memory; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Common +{ + internal class MemoryAllocator : IDisposable + { + private const int PoolEntries = 10; + + private struct PoolItem + { + public IntPtr Pointer; + public int Length; + public bool InUse; + } + + private PoolItem[] _pool = new PoolItem[PoolEntries]; + + public ArrayPtr Allocate(int length) where T : unmanaged + { + int lengthInBytes = Unsafe.SizeOf() * length; + + IntPtr ptr = IntPtr.Zero; + + for (int i = 0; i < PoolEntries; i++) + { + ref PoolItem item = ref _pool[i]; + + if (!item.InUse && item.Length == lengthInBytes) + { + item.InUse = true; + ptr = item.Pointer; + break; + } + } + + if (ptr == IntPtr.Zero) + { + ptr = Marshal.AllocHGlobal(lengthInBytes); + + for (int i = 0; i < PoolEntries; i++) + { + ref PoolItem item = ref _pool[i]; + + if (!item.InUse) + { + item.InUse = true; + if (item.Pointer != IntPtr.Zero) + { + Marshal.FreeHGlobal(item.Pointer); + } + item.Pointer = ptr; + item.Length = lengthInBytes; + break; + } + } + } + + return new ArrayPtr(ptr, length); + } + + public unsafe void Free(ArrayPtr arr) where T : unmanaged + { + IntPtr ptr = (IntPtr)arr.ToPointer(); + + for (int i = 0; i < PoolEntries; i++) + { + ref PoolItem item = ref _pool[i]; + + if (item.Pointer == ptr) + { + item.InUse = false; + break; + } + } + } + + public void Dispose() + { + for (int i = 0; i < PoolEntries; i++) + { + ref PoolItem item = ref _pool[i]; + + if (item.Pointer != IntPtr.Zero) + { + Marshal.FreeHGlobal(item.Pointer); + item.Pointer = IntPtr.Zero; + } + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs new file mode 100644 index 00000000..e53ec9bd --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/MemoryUtil.cs @@ -0,0 +1,25 @@ +using Ryujinx.Common.Memory; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Common +{ + internal static class MemoryUtil + { + public static unsafe void Copy(T* dest, T* source, int length) where T : unmanaged + { + new Span(source, length).CopyTo(new Span(dest, length)); + } + + public static void Copy(ref T dest, ref T source) where T : unmanaged + { + MemoryMarshal.CreateSpan(ref source, 1).CopyTo(MemoryMarshal.CreateSpan(ref dest, 1)); + } + + public static unsafe void Fill(T* ptr, T value, int length) where T : unmanaged + { + new Span(ptr, length).Fill(value); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs b/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs new file mode 100644 index 00000000..407e6f42 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Constants.cs @@ -0,0 +1,71 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Types; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class Constants + { + public const int Vp9InterpExtend = 4; + + public const int MaxMbPlane = 3; + + public const int None = -1; + public const int IntraFrame = 0; + public const int LastFrame = 1; + public const int GoldenFrame = 2; + public const int AltRefFrame = 3; + public const int MaxRefFrames = 4; + + public const int MiSizeLog2 = 3; + public const int MiBlockSizeLog2 = 6 - MiSizeLog2; // 64 = 2^6 + + public const int MiSize = 1 << MiSizeLog2; // pixels per mi-unit + public const int MiBlockSize = 1 << MiBlockSizeLog2; // mi-units per max block + public const int MiMask = MiBlockSize - 1; + + public const int PartitionPloffset = 4; // number of probability models per block size + + /* Segment Feature Masks */ + public const int MaxMvRefCandidates = 2; + + public const int CompInterContexts = 5; + public const int RefContexts = 5; + + public const int EightTap = 0; + public const int EightTapSmooth = 1; + public const int EightTapSharp = 2; + public const int SwitchableFilters = 3; /* Number of switchable filters */ + public const int Bilinear = 3; + public const int Switchable = 4; /* should be the last one */ + + // Frame + public const int RefsPerFrame = 3; + + public const int NumPingPongBuffers = 2; + + public const int Class0Bits = 1; /* bits at integer precision for class 0 */ + public const int Class0Size = 1 << Class0Bits; + + public const int MvInUseBits = 14; + public const int MvUpp = (1 << MvInUseBits) - 1; + public const int MvLow = -(1 << MvInUseBits); + + // Coefficient token alphabet + public const int ZeroToken = 0; // 0 Extra Bits 0+0 + public const int OneToken = 1; // 1 Extra Bits 0+1 + public const int TwoToken = 2; // 2 Extra Bits 0+1 + + public const int PivotNode = 2; + + public const int Cat1MinVal = 5; + public const int Cat2MinVal = 7; + public const int Cat3MinVal = 11; + public const int Cat4MinVal = 19; + public const int Cat5MinVal = 35; + public const int Cat6MinVal = 67; + + public const int EobModelToken = 3; + + public const int SegmentAbsData = 1; + public const int MaxSegments = 8; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs new file mode 100644 index 00000000..81c187e1 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs @@ -0,0 +1,1190 @@ +using Ryujinx.Common.Memory; +using System; +using System.Buffers.Binary; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Dsp; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using Ryujinx.Graphics.Video; +using Mv = Ryujinx.Graphics.Nvdec.Vp9.Types.Mv; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + static class DecodeFrame + { + private static bool ReadIsValid(ArrayPtr start, int len) + { + return len != 0 && len <= start.Length; + } + + private static void InverseTransformBlockInter(ref MacroBlockD xd, int plane, TxSize txSize, Span dst, int stride, int eob) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + ArrayPtr dqcoeff = pd.DqCoeff; + Debug.Assert(eob > 0); + if (xd.CurBuf.HighBd) + { + Span dst16 = MemoryMarshal.Cast(dst); + if (xd.Lossless) + { + Idct.HighbdIwht4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + } + else + { + switch (txSize) + { + case TxSize.Tx4x4: + Idct.HighbdIdct4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx8x8: + Idct.HighbdIdct8x8Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx16x16: + Idct.HighbdIdct16x16Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx32x32: + Idct.HighbdIdct32x32Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + default: Debug.Assert(false, "Invalid transform size"); break; + } + } + } + else + { + if (xd.Lossless) + { + Idct.Iwht4x4Add(dqcoeff.ToSpan(), dst, stride, eob); + } + else + { + switch (txSize) + { + case TxSize.Tx4x4: Idct.Idct4x4Add(dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx8x8: Idct.Idct8x8Add(dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx16x16: Idct.Idct16x16Add(dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx32x32: Idct.Idct32x32Add(dqcoeff.ToSpan(), dst, stride, eob); break; + default: Debug.Assert(false, "Invalid transform size"); return; + } + } + } + + if (eob == 1) + { + dqcoeff.ToSpan()[0] = 0; + } + else + { + if (txSize <= TxSize.Tx16x16 && eob <= 10) + { + dqcoeff.ToSpan().Slice(0, 4 * (4 << (int)txSize)).Fill(0); + } + else if (txSize == TxSize.Tx32x32 && eob <= 34) + { + dqcoeff.ToSpan().Slice(0, 256).Fill(0); + } + else + { + dqcoeff.ToSpan().Slice(0, 16 << ((int)txSize << 1)).Fill(0); + } + } + } + + private static void InverseTransformBlockIntra( + ref MacroBlockD xd, + int plane, + TxType txType, + TxSize txSize, + Span dst, + int stride, + int eob) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + ArrayPtr dqcoeff = pd.DqCoeff; + Debug.Assert(eob > 0); + if (xd.CurBuf.HighBd) + { + Span dst16 = MemoryMarshal.Cast(dst); + if (xd.Lossless) + { + Idct.HighbdIwht4x4Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + } + else + { + switch (txSize) + { + case TxSize.Tx4x4: + Idct.HighbdIht4x4Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx8x8: + Idct.HighbdIht8x8Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx16x16: + Idct.HighbdIht16x16Add(txType, dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + case TxSize.Tx32x32: + Idct.HighbdIdct32x32Add(dqcoeff.ToSpan(), dst16, stride, eob, xd.Bd); + break; + default: Debug.Assert(false, "Invalid transform size"); break; + } + } + } + else + { + if (xd.Lossless) + { + Idct.Iwht4x4Add(dqcoeff.ToSpan(), dst, stride, eob); + } + else + { + switch (txSize) + { + case TxSize.Tx4x4: Idct.Iht4x4Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx8x8: Idct.Iht8x8Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx16x16: Idct.Iht16x16Add(txType, dqcoeff.ToSpan(), dst, stride, eob); break; + case TxSize.Tx32x32: Idct.Idct32x32Add(dqcoeff.ToSpan(), dst, stride, eob); break; + default: Debug.Assert(false, "Invalid transform size"); return; + } + } + } + + if (eob == 1) + { + dqcoeff.ToSpan()[0] = 0; + } + else + { + if (txType == TxType.DctDct && txSize <= TxSize.Tx16x16 && eob <= 10) + { + dqcoeff.ToSpan().Slice(0, 4 * (4 << (int)txSize)).Fill(0); + } + else if (txSize == TxSize.Tx32x32 && eob <= 34) + { + dqcoeff.ToSpan().Slice(0, 256).Fill(0); + } + else + { + dqcoeff.ToSpan().Slice(0, 16 << ((int)txSize << 1)).Fill(0); + } + } + } + + private static unsafe void PredictAndReconstructIntraBlock( + ref TileWorkerData twd, + ref ModeInfo mi, + int plane, + int row, + int col, + TxSize txSize) + { + ref MacroBlockD xd = ref twd.Xd; + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + PredictionMode mode = (plane == 0) ? mi.Mode : mi.UvMode; + int dstOffset = 4 * row * pd.Dst.Stride + 4 * col; + byte* dst = &pd.Dst.Buf.ToPointer()[dstOffset]; + Span dstSpan = pd.Dst.Buf.ToSpan().Slice(dstOffset); + + if (mi.SbType < BlockSize.Block8x8) + { + if (plane == 0) + { + mode = xd.Mi[0].Value.Bmi[(row << 1) + col].Mode; + } + } + + ReconIntra.PredictIntraBlock(ref xd, pd.N4Wl, txSize, mode, dst, pd.Dst.Stride, dst, pd.Dst.Stride, col, row, plane); + + if (mi.Skip == 0) + { + TxType txType = + (plane != 0 || xd.Lossless) ? TxType.DctDct : ReconIntra.IntraModeToTxTypeLookup[(int)mode]; + var sc = (plane != 0 || xd.Lossless) + ? Luts.Vp9DefaultScanOrders[(int)txSize] + : Luts.Vp9ScanOrders[(int)txSize][(int)txType]; + int eob = Detokenize.DecodeBlockTokens(ref twd, plane, sc, col, row, txSize, mi.SegmentId); + if (eob > 0) + { + InverseTransformBlockIntra(ref xd, plane, txType, txSize, dstSpan, pd.Dst.Stride, eob); + } + } + } + + private static int ReconstructInterBlock( + ref TileWorkerData twd, + ref ModeInfo mi, + int plane, + int row, + int col, + TxSize txSize) + { + ref MacroBlockD xd = ref twd.Xd; + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + var sc = Luts.Vp9DefaultScanOrders[(int)txSize]; + int eob = Detokenize.DecodeBlockTokens(ref twd, plane, sc, col, row, txSize, mi.SegmentId); + Span dst = pd.Dst.Buf.ToSpan().Slice(4 * row * pd.Dst.Stride + 4 * col); + + if (eob > 0) + { + InverseTransformBlockInter(ref xd, plane, txSize, dst, pd.Dst.Stride, eob); + } + return eob; + } + + private static unsafe void BuildMcBorder( + byte* src, + int srcStride, + byte* dst, + int dstStride, + int x, + int y, + int bW, + int bH, + int w, + int h) + { + // Get a pointer to the start of the real data for this row. + byte* refRow = src - x - y * srcStride; + + if (y >= h) + { + refRow += (h - 1) * srcStride; + } + else if (y > 0) + { + refRow += y * srcStride; + } + + do + { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > bW) + { + left = bW; + } + + if (x + bW > w) + { + right = x + bW - w; + } + + if (right > bW) + { + right = bW; + } + + copy = bW - left - right; + + if (left != 0) + { + MemoryUtil.Fill(dst, refRow[0], left); + } + + if (copy != 0) + { + MemoryUtil.Copy(dst + left, refRow + x + left, copy); + } + + if (right != 0) + { + MemoryUtil.Fill(dst + left + copy, refRow[w - 1], right); + } + + dst += dstStride; + ++y; + + if (y > 0 && y < h) + { + refRow += srcStride; + } + } while (--bH != 0); + } + + private static unsafe void HighBuildMcBorder( + byte* src8, + int srcStride, + ushort* dst, + int dstStride, + int x, + int y, + int bW, + int bH, + int w, + int h) + { + // Get a pointer to the start of the real data for this row. + ushort* src = (ushort*)src8; + ushort* refRow = src - x - y * srcStride; + + if (y >= h) + { + refRow += (h - 1) * srcStride; + } + else if (y > 0) + { + refRow += y * srcStride; + } + + do + { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > bW) + { + left = bW; + } + + if (x + bW > w) + { + right = x + bW - w; + } + + if (right > bW) + { + right = bW; + } + + copy = bW - left - right; + + if (left != 0) + { + MemoryUtil.Fill(dst, refRow[0], left); + } + + if (copy != 0) + { + MemoryUtil.Copy(dst + left, refRow + x + left, copy); + } + + if (right != 0) + { + MemoryUtil.Fill(dst + left + copy, refRow[w - 1], right); + } + + dst += dstStride; + ++y; + + if (y > 0 && y < h) + { + refRow += srcStride; + } + } while (--bH != 0); + } + + [StructLayout(LayoutKind.Sequential, Size = 80 * 2 * 80 * 2)] + struct McBufHigh + { + } + + private static unsafe void ExtendAndPredict( + byte* bufPtr1, + int preBufStride, + int x0, + int y0, + int bW, + int bH, + int frameWidth, + int frameHeight, + int borderOffset, + byte* dst, + int dstBufStride, + int subpelX, + int subpelY, + Array8[] kernel, + ref ScaleFactors sf, + ref MacroBlockD xd, + int w, + int h, + int refr, + int xs, + int ys) + { + McBufHigh mcBufHighStruct; + ushort* mcBufHigh = (ushort*)Unsafe.AsPointer(ref mcBufHighStruct); // Avoid zero initialization. + if (xd.CurBuf.HighBd) + { + HighBuildMcBorder(bufPtr1, preBufStride, mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight); + ReconInter.HighbdInterPredictor( + mcBufHigh + borderOffset, + bW, + (ushort*)dst, + dstBufStride, + subpelX, + subpelY, + ref sf, + w, + h, + refr, + kernel, + xs, + ys, + xd.Bd); + } + else + { + BuildMcBorder(bufPtr1, preBufStride, (byte*)mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight); + ReconInter.InterPredictor( + (byte*)mcBufHigh + borderOffset, + bW, + dst, + dstBufStride, + subpelX, + subpelY, + ref sf, + w, + h, + refr, + kernel, + xs, + ys); + } + } + + private static unsafe void DecBuildInterPredictors( + ref MacroBlockD xd, + int plane, + int bw, + int bh, + int x, + int y, + int w, + int h, + int miX, + int miY, + Array8[] kernel, + ref ScaleFactors sf, + ref Buf2D preBuf, + ref Buf2D dstBuf, + ref Mv mv, + ref Surface refFrameBuf, + bool isScaled, + int refr) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + byte* dst = dstBuf.Buf.ToPointer() + dstBuf.Stride * y + x; + Mv32 scaledMv; + int xs, ys, x0, y0, x0_16, y0_16, frameWidth, frameHeight, bufStride, subpelX, subpelY; + byte* refFrame; + byte* bufPtr; + + // Get reference frame pointer, width and height. + if (plane == 0) + { + frameWidth = refFrameBuf.Width; + frameHeight = refFrameBuf.Height; + refFrame = refFrameBuf.YBuffer.ToPointer(); + } + else + { + frameWidth = refFrameBuf.UvWidth; + frameHeight = refFrameBuf.UvHeight; + refFrame = plane == 1 ? refFrameBuf.UBuffer.ToPointer() : refFrameBuf.VBuffer.ToPointer(); + } + + if (isScaled) + { + Mv mvQ4 = ReconInter.ClampMvToUmvBorderSb(ref xd, ref mv, bw, bh, pd.SubsamplingX, pd.SubsamplingY); + // Co-ordinate of containing block to pixel precision. + int xStart = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)); + int yStart = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)); + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (xStart + x) << Filter.SubpelBits; + y0_16 = (yStart + y) << Filter.SubpelBits; + + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf.ScaleValueX(x0_16); + y0_16 = sf.ScaleValueY(y0_16); + + // Map the top left corner of the block into the reference frame. + x0 = sf.ScaleValueX(xStart + x); + y0 = sf.ScaleValueY(yStart + y); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. + scaledMv = sf.ScaleMv(ref mvQ4, miX + x, miY + y); + xs = sf.XStepQ4; + ys = sf.YStepQ4; + } + else + { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x; + y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << Filter.SubpelBits; + y0_16 = y0 << Filter.SubpelBits; + + scaledMv.Row = mv.Row * (1 << (1 - pd.SubsamplingY)); + scaledMv.Col = mv.Col * (1 << (1 - pd.SubsamplingX)); + xs = ys = 16; + } + subpelX = scaledMv.Col & Filter.SubpelMask; + subpelY = scaledMv.Row & Filter.SubpelMask; + + // Calculate the top left corner of the best matching block in the + // reference frame. + x0 += scaledMv.Col >> Filter.SubpelBits; + y0 += scaledMv.Row >> Filter.SubpelBits; + x0_16 += scaledMv.Col; + y0_16 += scaledMv.Row; + + // Get reference block pointer. + bufPtr = refFrame + y0 * preBuf.Stride + x0; + bufStride = preBuf.Stride; + + // Do border extension if there is motion or the + // width/height is not a multiple of 8 pixels. + if (isScaled || scaledMv.Col != 0 || scaledMv.Row != 0 || (frameWidth & 0x7) != 0 || (frameHeight & 0x7) != 0) + { + int y1 = ((y0_16 + (h - 1) * ys) >> Filter.SubpelBits) + 1; + + // Get reference block bottom right horizontal coordinate. + int x1 = ((x0_16 + (w - 1) * xs) >> Filter.SubpelBits) + 1; + int xPad = 0, yPad = 0; + + if (subpelX != 0 || (sf.XStepQ4 != Filter.SubpelShifts)) + { + x0 -= Constants.Vp9InterpExtend - 1; + x1 += Constants.Vp9InterpExtend; + xPad = 1; + } + + if (subpelY != 0 || (sf.YStepQ4 != Filter.SubpelShifts)) + { + y0 -= Constants.Vp9InterpExtend - 1; + y1 += Constants.Vp9InterpExtend; + yPad = 1; + } + + // Skip border extension if block is inside the frame. + if (x0 < 0 || x0 > frameWidth - 1 || x1 < 0 || x1 > frameWidth - 1 || + y0 < 0 || y0 > frameHeight - 1 || y1 < 0 || y1 > frameHeight - 1) + { + // Extend the border. + byte* bufPtr1 = refFrame + y0 * bufStride + x0; + int bW = x1 - x0 + 1; + int bH = y1 - y0 + 1; + int borderOffset = yPad * 3 * bW + xPad * 3; + + ExtendAndPredict( + bufPtr1, + bufStride, + x0, + y0, + bW, + bH, + frameWidth, + frameHeight, + borderOffset, + dst, + dstBuf.Stride, + subpelX, + subpelY, + kernel, + ref sf, + ref xd, + w, + h, + refr, + xs, + ys); + return; + } + } + if (xd.CurBuf.HighBd) + { + ReconInter.HighbdInterPredictor( + (ushort*)bufPtr, + bufStride, + (ushort*)dst, + dstBuf.Stride, + subpelX, + subpelY, + ref sf, + w, + h, + refr, + kernel, + xs, + ys, + xd.Bd); + } + else + { + ReconInter.InterPredictor( + bufPtr, + bufStride, + dst, + dstBuf.Stride, + subpelX, + subpelY, + ref sf, + w, + h, + refr, + kernel, + xs, + ys); + } + } + + private static void DecBuildInterPredictorsSb(ref Vp9Common cm, ref MacroBlockD xd, int miRow, int miCol) + { + int plane; + int miX = miCol * Constants.MiSize; + int miY = miRow * Constants.MiSize; + ref ModeInfo mi = ref xd.Mi[0].Value; + Array8[] kernel = Luts.Vp9FilterKernels[mi.InterpFilter]; + BlockSize sbType = mi.SbType; + int isCompound = mi.HasSecondRef() ? 1 : 0; + int refr; + bool isScaled; + + for (refr = 0; refr < 1 + isCompound; ++refr) + { + int frame = mi.RefFrame[refr]; + ref RefBuffer refBuf = ref cm.FrameRefs[frame - Constants.LastFrame]; + ref ScaleFactors sf = ref refBuf.Sf; + ref Surface refFrameBuf = ref refBuf.Buf; + + if (!sf.IsValidScale()) + { + xd.ErrorInfo.Value.InternalError(CodecErr.CodecUnsupBitstream, "Reference frame has invalid dimensions"); + } + + isScaled = sf.IsScaled(); + ReconInter.SetupPrePlanes(ref xd, refr, ref refFrameBuf, miRow, miCol, isScaled ? new Ptr(ref sf) : Ptr.Null); + xd.BlockRefs[refr] = new Ptr(ref refBuf); + + if (sbType < BlockSize.Block8x8) + { + for (plane = 0; plane < Constants.MaxMbPlane; ++plane) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + ref Buf2D dstBuf = ref pd.Dst; + int num4x4W = pd.N4W; + int num4x4H = pd.N4H; + int n4Wx4 = 4 * num4x4W; + int n4Hx4 = 4 * num4x4H; + ref Buf2D preBuf = ref pd.Pre[refr]; + int i = 0, x, y; + for (y = 0; y < num4x4H; ++y) + { + for (x = 0; x < num4x4W; ++x) + { + Mv mv = ReconInter.AverageSplitMvs(ref pd, ref mi, refr, i++); + DecBuildInterPredictors( + ref xd, + plane, + n4Wx4, + n4Hx4, + 4 * x, + 4 * y, + 4, + 4, + miX, + miY, + kernel, + ref sf, + ref preBuf, + ref dstBuf, + ref mv, + ref refFrameBuf, + isScaled, + refr); + } + } + } + } + else + { + Mv mv = mi.Mv[refr]; + for (plane = 0; plane < Constants.MaxMbPlane; ++plane) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + ref Buf2D dstBuf = ref pd.Dst; + int num4x4W = pd.N4W; + int num4x4H = pd.N4H; + int n4Wx4 = 4 * num4x4W; + int n4Hx4 = 4 * num4x4H; + ref Buf2D preBuf = ref pd.Pre[refr]; + DecBuildInterPredictors( + ref xd, + plane, + n4Wx4, + n4Hx4, + 0, + 0, + n4Wx4, + n4Hx4, + miX, + miY, + kernel, + ref sf, + ref preBuf, + ref dstBuf, + ref mv, + ref refFrameBuf, + isScaled, + refr); + } + } + } + } + + private static unsafe void DecResetSkipContext(ref MacroBlockD xd) + { + int i; + for (i = 0; i < Constants.MaxMbPlane; i++) + { + ref MacroBlockDPlane pd = ref xd.Plane[i]; + MemoryUtil.Fill(pd.AboveContext.ToPointer(), (sbyte)0, pd.N4W); + MemoryUtil.Fill(pd.LeftContext.ToPointer(), (sbyte)0, pd.N4H); + } + } + + private static void SetPlaneN4(ref MacroBlockD xd, int bw, int bh, int bwl, int bhl) + { + int i; + for (i = 0; i < Constants.MaxMbPlane; i++) + { + xd.Plane[i].N4W = (ushort)((bw << 1) >> xd.Plane[i].SubsamplingX); + xd.Plane[i].N4H = (ushort)((bh << 1) >> xd.Plane[i].SubsamplingY); + xd.Plane[i].N4Wl = (byte)(bwl - xd.Plane[i].SubsamplingX); + xd.Plane[i].N4Hl = (byte)(bhl - xd.Plane[i].SubsamplingY); + } + } + + private static ref ModeInfo SetOffsets( + ref Vp9Common cm, + ref MacroBlockD xd, + BlockSize bsize, + int miRow, + int miCol, + int bw, + int bh, + int xMis, + int yMis, + int bwl, + int bhl) + { + int offset = miRow * cm.MiStride + miCol; + int x, y; + ref TileInfo tile = ref xd.Tile; + + xd.Mi = cm.MiGridVisible.Slice(offset); + xd.Mi[0] = new Ptr(ref cm.Mi[offset]); + xd.Mi[0].Value.SbType = bsize; + for (y = 0; y < yMis; ++y) + { + for (x = y == 0 ? 1 : 0; x < xMis; ++x) + { + xd.Mi[y * cm.MiStride + x] = xd.Mi[0]; + } + } + + SetPlaneN4(ref xd, bw, bh, bwl, bhl); + + xd.SetSkipContext(miRow, miCol); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + xd.SetMiRowCol(ref tile, miRow, bh, miCol, bw, cm.MiRows, cm.MiCols); + + ReconInter.SetupDstPlanes(ref xd.Plane, ref xd.CurBuf, miRow, miCol); + return ref xd.Mi[0].Value; + } + + private static void DecodeBlock( + ref TileWorkerData twd, + ref Vp9Common cm, + int miRow, + int miCol, + BlockSize bsize, + int bwl, + int bhl) + { + bool less8x8 = bsize < BlockSize.Block8x8; + int bw = 1 << (bwl - 1); + int bh = 1 << (bhl - 1); + int xMis = Math.Min(bw, cm.MiCols - miCol); + int yMis = Math.Min(bh, cm.MiRows - miRow); + ref Reader r = ref twd.BitReader; + ref MacroBlockD xd = ref twd.Xd; + + ref ModeInfo mi = ref SetOffsets(ref cm, ref xd, bsize, miRow, miCol, bw, bh, xMis, yMis, bwl, bhl); + + if (bsize >= BlockSize.Block8x8 && (cm.SubsamplingX != 0 || cm.SubsamplingY != 0)) + { + BlockSize uvSubsize = Luts.SsSizeLookup[(int)bsize][cm.SubsamplingX][cm.SubsamplingY]; + if (uvSubsize == BlockSize.BlockInvalid) + { + xd.ErrorInfo.Value.InternalError(CodecErr.CodecCorruptFrame, "Invalid block size."); + } + } + + DecodeMv.ReadModeInfo(ref twd, ref cm, miRow, miCol, xMis, yMis); + + if (mi.Skip != 0) + { + DecResetSkipContext(ref xd); + } + + if (!mi.IsInterBlock()) + { + int plane; + for (plane = 0; plane < Constants.MaxMbPlane; ++plane) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize; + int num4x4W = pd.N4W; + int num4x4H = pd.N4H; + int step = 1 << (int)txSize; + int row, col; + int maxBlocksWide = num4x4W + (xd.MbToRightEdge >= 0 ? 0 : xd.MbToRightEdge >> (5 + pd.SubsamplingX)); + int maxBlocksHigh = num4x4H + (xd.MbToBottomEdge >= 0 ? 0 : xd.MbToBottomEdge >> (5 + pd.SubsamplingY)); + + xd.MaxBlocksWide = (uint)(xd.MbToRightEdge >= 0 ? 0 : maxBlocksWide); + xd.MaxBlocksHigh = (uint)(xd.MbToBottomEdge >= 0 ? 0 : maxBlocksHigh); + + for (row = 0; row < maxBlocksHigh; row += step) + { + for (col = 0; col < maxBlocksWide; col += step) + { + PredictAndReconstructIntraBlock(ref twd, ref mi, plane, row, col, txSize); + } + } + } + } + else + { + // Prediction + DecBuildInterPredictorsSb(ref cm, ref xd, miRow, miCol); + + // Reconstruction + if (mi.Skip == 0) + { + int eobtotal = 0; + int plane; + + for (plane = 0; plane < Constants.MaxMbPlane; ++plane) + { + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + TxSize txSize = plane != 0 ? mi.GetUvTxSize(ref pd) : mi.TxSize; + int num4x4W = pd.N4W; + int num4x4H = pd.N4H; + int step = 1 << (int)txSize; + int row, col; + int maxBlocksWide = num4x4W + (xd.MbToRightEdge >= 0 ? 0 : xd.MbToRightEdge >> (5 + pd.SubsamplingX)); + int maxBlocksHigh = num4x4H + (xd.MbToBottomEdge >= 0 ? 0 : xd.MbToBottomEdge >> (5 + pd.SubsamplingY)); + + xd.MaxBlocksWide = (uint)(xd.MbToRightEdge >= 0 ? 0 : maxBlocksWide); + xd.MaxBlocksHigh = (uint)(xd.MbToBottomEdge >= 0 ? 0 : maxBlocksHigh); + + for (row = 0; row < maxBlocksHigh; row += step) + { + for (col = 0; col < maxBlocksWide; col += step) + { + eobtotal += ReconstructInterBlock(ref twd, ref mi, plane, row, col, txSize); + } + } + } + + if (!less8x8 && eobtotal == 0) + { + mi.Skip = 1; // Skip loopfilter + } + } + } + + xd.Corrupted |= r.HasError(); + + if (cm.Lf.FilterLevel != 0) + { + LoopFilter.BuildMask(ref cm, ref mi, miRow, miCol, bw, bh); + } + } + + private static int DecPartitionPlaneContext(ref TileWorkerData twd, int miRow, int miCol, int bsl) + { + ref sbyte aboveCtx = ref twd.Xd.AboveSegContext[miCol]; + ref sbyte leftCtx = ref twd.Xd.LeftSegContext[miRow & Constants.MiMask]; + int above = (aboveCtx >> bsl) & 1, left = (leftCtx >> bsl) & 1; + + return (left * 2 + above) + bsl * Constants.PartitionPloffset; + } + + private static void DecUpdatePartitionContext( + ref TileWorkerData twd, + int miRow, + int miCol, + BlockSize subsize, + int bw) + { + Span aboveCtx = twd.Xd.AboveSegContext.Slice(miCol).ToSpan(); + Span leftCtx = MemoryMarshal.CreateSpan(ref twd.Xd.LeftSegContext[miRow & Constants.MiMask], 8 - (miRow & Constants.MiMask)); + + // Update the partition context at the end notes. Set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + aboveCtx.Slice(0, bw).Fill(Luts.PartitionContextLookup[(int)subsize].Above); + leftCtx.Slice(0, bw).Fill(Luts.PartitionContextLookup[(int)subsize].Left); + } + + private static PartitionType ReadPartition( + ref TileWorkerData twd, + int miRow, + int miCol, + int hasRows, + int hasCols, + int bsl) + { + int ctx = DecPartitionPlaneContext(ref twd, miRow, miCol, bsl); + ReadOnlySpan probs = MemoryMarshal.CreateReadOnlySpan(ref twd.Xd.PartitionProbs[ctx][0], 3); + PartitionType p; + ref Reader r = ref twd.BitReader; + + if (hasRows != 0 && hasCols != 0) + { + p = (PartitionType)r.ReadTree(Luts.Vp9PartitionTree, probs); + } + else if (hasRows == 0 && hasCols != 0) + { + p = r.Read(probs[1]) != 0 ? PartitionType.PartitionSplit : PartitionType.PartitionHorz; + } + else if (hasRows != 0 && hasCols == 0) + { + p = r.Read(probs[2]) != 0 ? PartitionType.PartitionSplit : PartitionType.PartitionVert; + } + else + { + p = PartitionType.PartitionSplit; + } + + if (!twd.Xd.Counts.IsNull) + { + ++twd.Xd.Counts.Value.Partition[ctx][(int)p]; + } + + return p; + } + + private static void DecodePartition( + ref TileWorkerData twd, + ref Vp9Common cm, + int miRow, + int miCol, + BlockSize bsize, + int n4x4L2) + { + int n8x8L2 = n4x4L2 - 1; + int num8x8Wh = 1 << n8x8L2; + int hbs = num8x8Wh >> 1; + PartitionType partition; + BlockSize subsize; + bool hasRows = (miRow + hbs) < cm.MiRows; + bool hasCols = (miCol + hbs) < cm.MiCols; + ref MacroBlockD xd = ref twd.Xd; + + if (miRow >= cm.MiRows || miCol >= cm.MiCols) + { + return; + } + + partition = ReadPartition(ref twd, miRow, miCol, hasRows ? 1 : 0, hasCols ? 1 : 0, n8x8L2); + subsize = Luts.SubsizeLookup[(int)partition][(int)bsize]; + if (hbs == 0) + { + // Calculate bmode block dimensions (log 2) + xd.BmodeBlocksWl = (byte)(1 >> ((partition & PartitionType.PartitionVert) != 0 ? 1 : 0)); + xd.BmodeBlocksHl = (byte)(1 >> ((partition & PartitionType.PartitionHorz) != 0 ? 1 : 0)); + DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, 1, 1); + } + else + { + switch (partition) + { + case PartitionType.PartitionNone: + DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n4x4L2, n4x4L2); + break; + case PartitionType.PartitionHorz: + DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n4x4L2, n8x8L2); + if (hasRows) + { + DecodeBlock(ref twd, ref cm, miRow + hbs, miCol, subsize, n4x4L2, n8x8L2); + } + + break; + case PartitionType.PartitionVert: + DecodeBlock(ref twd, ref cm, miRow, miCol, subsize, n8x8L2, n4x4L2); + if (hasCols) + { + DecodeBlock(ref twd, ref cm, miRow, miCol + hbs, subsize, n8x8L2, n4x4L2); + } + + break; + case PartitionType.PartitionSplit: + DecodePartition(ref twd, ref cm, miRow, miCol, subsize, n8x8L2); + DecodePartition(ref twd, ref cm, miRow, miCol + hbs, subsize, n8x8L2); + DecodePartition(ref twd, ref cm, miRow + hbs, miCol, subsize, n8x8L2); + DecodePartition(ref twd, ref cm, miRow + hbs, miCol + hbs, subsize, n8x8L2); + break; + default: Debug.Assert(false, "Invalid partition type"); break; + } + } + + // Update partition context + if (bsize >= BlockSize.Block8x8 && (bsize == BlockSize.Block8x8 || partition != PartitionType.PartitionSplit)) + { + DecUpdatePartitionContext(ref twd, miRow, miCol, subsize, num8x8Wh); + } + } + + private static void SetupTokenDecoder( + ArrayPtr data, + int readSize, + ref InternalErrorInfo errorInfo, + ref Reader r) + { + // Validate the calculated partition length. If the buffer described by the + // partition can't be fully read then throw an error. + if (!ReadIsValid(data, readSize)) + { + errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile length"); + } + + if (r.Init(data, readSize)) + { + errorInfo.InternalError(CodecErr.CodecMemError, "Failed to allocate bool decoder 1"); + } + } + + // Reads the next tile returning its size and adjusting '*data' accordingly + // based on 'isLast'. + private static void GetTileBuffer( + bool isLast, + ref InternalErrorInfo errorInfo, + ref ArrayPtr data, + ref TileBuffer buf) + { + int size; + + if (!isLast) + { + if (!ReadIsValid(data, 4)) + { + errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile length"); + } + + size = BinaryPrimitives.ReadInt32BigEndian(data.ToSpan()); + data = data.Slice(4); + + if (size > data.Length) + { + errorInfo.InternalError(CodecErr.CodecCorruptFrame, "Truncated packet or corrupt tile size"); + } + } + else + { + size = data.Length; + } + + buf.Data = data; + buf.Size = size; + + data = data.Slice(size); + } + + private static void GetTileBuffers( + ref Vp9Common cm, + ArrayPtr data, + int tileCols, + int tileRows, + ref Array4> tileBuffers) + { + int r, c; + + for (r = 0; r < tileRows; ++r) + { + for (c = 0; c < tileCols; ++c) + { + bool isLast = (r == tileRows - 1) && (c == tileCols - 1); + ref TileBuffer buf = ref tileBuffers[r][c]; + GetTileBuffer(isLast, ref cm.Error, ref data, ref buf); + } + } + } + + public static unsafe ArrayPtr DecodeTiles(ref Vp9Common cm, ArrayPtr data) + { + int alignedCols = TileInfo.MiColsAlignedToSb(cm.MiCols); + int tileCols = 1 << cm.Log2TileCols; + int tileRows = 1 << cm.Log2TileRows; + Array4> tileBuffers = new Array4>(); + int tileRow, tileCol; + int miRow, miCol; + + Debug.Assert(tileRows <= 4); + Debug.Assert(tileCols <= (1 << 6)); + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + MemoryUtil.Fill(cm.AboveContext.ToPointer(), (sbyte)0, Constants.MaxMbPlane * 2 * alignedCols); + MemoryUtil.Fill(cm.AboveSegContext.ToPointer(), (sbyte)0, alignedCols); + + LoopFilter.ResetLfm(ref cm); + + GetTileBuffers(ref cm, data, tileCols, tileRows, ref tileBuffers); + // Load all tile information into tile_data. + for (tileRow = 0; tileRow < tileRows; ++tileRow) + { + for (tileCol = 0; tileCol < tileCols; ++tileCol) + { + ref TileBuffer buf = ref tileBuffers[tileRow][tileCol]; + ref TileWorkerData tileData = ref cm.TileWorkerData[tileCols * tileRow + tileCol]; + tileData.Xd = cm.Mb; + tileData.Xd.Corrupted = false; + tileData.Xd.Counts = cm.FrameParallelDecodingMode ? Ptr.Null : cm.Counts; + tileData.Dqcoeff = new Array32>(); + tileData.Xd.Tile.Init(ref cm, tileRow, tileCol); + SetupTokenDecoder(buf.Data, buf.Size, ref cm.Error, ref tileData.BitReader); + cm.InitMacroBlockD(ref tileData.Xd, new ArrayPtr(ref tileData.Dqcoeff[0][0], 32 * 32)); + } + } + + for (tileRow = 0; tileRow < tileRows; ++tileRow) + { + TileInfo tile = new TileInfo(); + tile.SetRow(ref cm, tileRow); + for (miRow = tile.MiRowStart; miRow < tile.MiRowEnd; miRow += Constants.MiBlockSize) + { + for (tileCol = 0; tileCol < tileCols; ++tileCol) + { + int col = tileCol; + ref TileWorkerData tileData = ref cm.TileWorkerData[tileCols * tileRow + col]; + tile.SetCol(ref cm, col); + tileData.Xd.LeftContext = new Array3>(); + tileData.Xd.LeftSegContext = new Array8(); + for (miCol = tile.MiColStart; miCol < tile.MiColEnd; miCol += Constants.MiBlockSize) + { + DecodePartition(ref tileData, ref cm, miRow, miCol, BlockSize.Block64x64, 4); + } + cm.Mb.Corrupted |= tileData.Xd.Corrupted; + if (cm.Mb.Corrupted) + { + cm.Error.InternalError(CodecErr.CodecCorruptFrame, "Failed to decode tile data"); + }; + } + } + } + + // Get last tile data. + return cm.TileWorkerData[tileCols * tileRows - 1].BitReader.FindEnd(); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs new file mode 100644 index 00000000..96cdd574 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeMv.cs @@ -0,0 +1,1159 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Dsp; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using Mv = Ryujinx.Graphics.Nvdec.Vp9.Types.Mv; +using MvRef = Ryujinx.Graphics.Nvdec.Vp9.Types.MvRef; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class DecodeMv + { + private const int MvrefNeighbours = 8; + + private static PredictionMode ReadIntraMode(ref Reader r, ReadOnlySpan p) + { + return (PredictionMode)r.ReadTree(Luts.Vp9IntraModeTree, p); + } + + private static PredictionMode ReadIntraModeY(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, int sizeGroup) + { + PredictionMode yMode = ReadIntraMode(ref r, cm.Fc.Value.YModeProb[sizeGroup].ToSpan()); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.YMode[sizeGroup][(int)yMode]; + } + + return yMode; + } + + private static PredictionMode ReadIntraModeUv(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, byte yMode) + { + PredictionMode uvMode = ReadIntraMode(ref r, cm.Fc.Value.UvModeProb[yMode].ToSpan()); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.UvMode[yMode][(int)uvMode]; + } + + return uvMode; + } + + private static PredictionMode ReadInterMode(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r, int ctx) + { + int mode = r.ReadTree(Luts.Vp9InterModeTree, cm.Fc.Value.InterModeProb[ctx].ToSpan()); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.InterMode[ctx][mode]; + } + + return PredictionMode.NearestMv + mode; + } + + private static int ReadSegmentId(ref Reader r, ref Array7 segTreeProbs) + { + return r.ReadTree(Luts.Vp9SegmentTree, segTreeProbs.ToSpan()); + } + + private static ReadOnlySpan GetTxProbs(ref Vp9EntropyProbs fc, TxSize maxTxSize, int ctx) + { + switch (maxTxSize) + { + case TxSize.Tx8x8: return fc.Tx8x8Prob[ctx].ToSpan(); + case TxSize.Tx16x16: return fc.Tx16x16Prob[ctx].ToSpan(); + case TxSize.Tx32x32: return fc.Tx32x32Prob[ctx].ToSpan(); + default: Debug.Assert(false, "Invalid maxTxSize."); return ReadOnlySpan.Empty; + } + } + + private static Span GetTxCounts(ref Vp9BackwardUpdates counts, TxSize maxTxSize, int ctx) + { + switch (maxTxSize) + { + case TxSize.Tx8x8: return counts.Tx8x8[ctx].ToSpan(); + case TxSize.Tx16x16: return counts.Tx16x16[ctx].ToSpan(); + case TxSize.Tx32x32: return counts.Tx32x32[ctx].ToSpan(); + default: Debug.Assert(false, "Invalid maxTxSize."); return Span.Empty; + } + } + + private static TxSize ReadSelectedTxSize(ref Vp9Common cm, ref MacroBlockD xd, TxSize maxTxSize, ref Reader r) + { + int ctx = xd.GetTxSizeContext(); + ReadOnlySpan txProbs = GetTxProbs(ref cm.Fc.Value, maxTxSize, ctx); + TxSize txSize = (TxSize)r.Read(txProbs[0]); + if (txSize != TxSize.Tx4x4 && maxTxSize >= TxSize.Tx16x16) + { + txSize += r.Read(txProbs[1]); + if (txSize != TxSize.Tx8x8 && maxTxSize >= TxSize.Tx32x32) + { + txSize += r.Read(txProbs[2]); + } + } + + if (!xd.Counts.IsNull) + { + ++GetTxCounts(ref xd.Counts.Value, maxTxSize, ctx)[(int)txSize]; + } + + return txSize; + } + + private static TxSize ReadTxSize(ref Vp9Common cm, ref MacroBlockD xd, bool allowSelect, ref Reader r) + { + TxMode txMode = cm.TxMode; + BlockSize bsize = xd.Mi[0].Value.SbType; + TxSize maxTxSize = Luts.MaxTxSizeLookup[(int)bsize]; + if (allowSelect && txMode == TxMode.TxModeSelect && bsize >= BlockSize.Block8x8) + { + return ReadSelectedTxSize(ref cm, ref xd, maxTxSize, ref r); + } + else + { + return (TxSize)Math.Min((int)maxTxSize, (int)Luts.TxModeToBiggestTxSize[(int)txMode]); + } + } + + private static int DecGetSegmentId(ref Vp9Common cm, ArrayPtr segmentIds, int miOffset, int xMis, int yMis) + { + int x, y, segmentId = int.MaxValue; + + for (y = 0; y < yMis; y++) + { + for (x = 0; x < xMis; x++) + { + segmentId = Math.Min(segmentId, segmentIds[miOffset + y * cm.MiCols + x]); + } + } + + Debug.Assert(segmentId >= 0 && segmentId < Constants.MaxSegments); + return segmentId; + } + + private static void SetSegmentId(ref Vp9Common cm, int miOffset, int xMis, int yMis, int segmentId) + { + int x, y; + + Debug.Assert(segmentId >= 0 && segmentId < Constants.MaxSegments); + + for (y = 0; y < yMis; y++) + { + for (x = 0; x < xMis; x++) + { + cm.CurrentFrameSegMap[miOffset + y * cm.MiCols + x] = (byte)segmentId; + } + } + } + + private static void CopySegmentId( + ref Vp9Common cm, + ArrayPtr lastSegmentIds, + ArrayPtr currentSegmentIds, + int miOffset, + int xMis, + int yMis) + { + int x, y; + + for (y = 0; y < yMis; y++) + { + for (x = 0; x < xMis; x++) + { + currentSegmentIds[miOffset + y * cm.MiCols + x] = (byte)(!lastSegmentIds.IsNull ? lastSegmentIds[miOffset + y * cm.MiCols + x] : 0); + } + } + } + + private static int ReadIntraSegmentId(ref Vp9Common cm, int miOffset, int xMis, int yMis, ref Reader r) + { + ref Segmentation seg = ref cm.Seg; + int segmentId; + + if (!seg.Enabled) + { + return 0; // Default for disabled segmentation + } + + if (!seg.UpdateMap) + { + CopySegmentId(ref cm, cm.LastFrameSegMap, cm.CurrentFrameSegMap, miOffset, xMis, yMis); + return 0; + } + + segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb); + SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId); + return segmentId; + } + + private static int ReadInterSegmentId( + ref Vp9Common cm, + ref MacroBlockD xd, + int miRow, + int miCol, + ref Reader r, + int xMis, + int yMis) + { + ref Segmentation seg = ref cm.Seg; + ref ModeInfo mi = ref xd.Mi[0].Value; + int predictedSegmentId, segmentId; + int miOffset = miRow * cm.MiCols + miCol; + + if (!seg.Enabled) + { + return 0; // Default for disabled segmentation + } + + predictedSegmentId = !cm.LastFrameSegMap.IsNull + ? DecGetSegmentId(ref cm, cm.LastFrameSegMap, miOffset, xMis, yMis) + : 0; + + if (!seg.UpdateMap) + { + CopySegmentId(ref cm, cm.LastFrameSegMap, cm.CurrentFrameSegMap, miOffset, xMis, yMis); + return predictedSegmentId; + } + + if (seg.TemporalUpdate) + { + byte predProb = Segmentation.GetPredProbSegId(ref cm.Fc.Value.SegPredProb, ref xd); + mi.SegIdPredicted = (sbyte)r.Read(predProb); + segmentId = mi.SegIdPredicted != 0 ? predictedSegmentId : ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb); + } + else + { + segmentId = ReadSegmentId(ref r, ref cm.Fc.Value.SegTreeProb); + } + SetSegmentId(ref cm, miOffset, xMis, yMis, segmentId); + return segmentId; + } + + private static int ReadSkip(ref Vp9Common cm, ref MacroBlockD xd, int segmentId, ref Reader r) + { + if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlSkip) != 0) + { + return 1; + } + else + { + int ctx = xd.GetSkipContext(); + int skip = r.Read(cm.Fc.Value.SkipProb[ctx]); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.Skip[ctx][skip]; + } + + return skip; + } + } + + private static int ReadMvComponent(ref Reader r, ref Vp9EntropyProbs fc, int mvcomp, bool usehp) + { + int mag, d, fr, hp; + bool sign = r.Read(fc.Sign[mvcomp]) != 0; + MvClassType mvClass = (MvClassType)r.ReadTree(Luts.Vp9MvClassTree, fc.Classes[mvcomp].ToSpan()); + bool class0 = mvClass == MvClassType.MvClass0; + + // Integer part + if (class0) + { + d = r.Read(fc.Class0[mvcomp][0]); + mag = 0; + } + else + { + int i; + int n = (int)mvClass + Constants.Class0Bits - 1; // Number of bits + + d = 0; + for (i = 0; i < n; ++i) + { + d |= r.Read(fc.Bits[mvcomp][i]) << i; + } + + mag = Constants.Class0Size << ((int)mvClass + 2); + } + + // Fractional part + fr = r.ReadTree(Luts.Vp9MvFPTree, class0 ? fc.Class0Fp[mvcomp][d].ToSpan() : fc.Fp[mvcomp].ToSpan()); + + // High precision part (if hp is not used, the default value of the hp is 1) + hp = usehp ? r.Read(class0 ? fc.Class0Hp[mvcomp] : fc.Hp[mvcomp]) : 1; + + // Result + mag += ((d << 3) | (fr << 1) | hp) + 1; + return sign ? -mag : mag; + } + + private static void ReadMv( + ref Reader r, + ref Mv mv, + ref Mv refr, + ref Vp9EntropyProbs fc, + Ptr counts, + bool allowHP) + { + MvJointType jointType = (MvJointType)r.ReadTree(Luts.Vp9MvJointTree, fc.Joints.ToSpan()); + bool useHP = allowHP && refr.UseMvHp(); + Mv diff = new Mv(); + + if (Mv.MvJointVertical(jointType)) + { + diff.Row = (short)ReadMvComponent(ref r, ref fc, 0, useHP); + } + + if (Mv.MvJointHorizontal(jointType)) + { + diff.Col = (short)ReadMvComponent(ref r, ref fc, 1, useHP); + } + + diff.IncMv(counts); + + mv.Row = (short)(refr.Row + diff.Row); + mv.Col = (short)(refr.Col + diff.Col); + } + + private static ReferenceMode ReadBlockReferenceMode(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r) + { + if (cm.ReferenceMode == ReferenceMode.ReferenceModeSelect) + { + int ctx = PredCommon.GetReferenceModeContext(ref cm, ref xd); + ReferenceMode mode = (ReferenceMode)r.Read(cm.Fc.Value.CompInterProb[ctx]); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.CompInter[ctx][(int)mode]; + } + + return mode; // SingleReference or CompoundReference + } + else + { + return cm.ReferenceMode; + } + } + + // Read the referncence frame + private static void ReadRefFrames( + ref Vp9Common cm, + ref MacroBlockD xd, + ref Reader r, + int segmentId, + ref Array2 refFrame) + { + ref Vp9EntropyProbs fc = ref cm.Fc.Value; + + if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlRefFrame) != 0) + { + refFrame[0] = (sbyte)cm.Seg.GetSegData(segmentId, SegLvlFeatures.SegLvlRefFrame); + refFrame[1] = Constants.None; + } + else + { + ReferenceMode mode = ReadBlockReferenceMode(ref cm, ref xd, ref r); + if (mode == ReferenceMode.CompoundReference) + { + int idx = cm.RefFrameSignBias[cm.CompFixedRef]; + int ctx = PredCommon.GetPredContextCompRefP(ref cm, ref xd); + int bit = r.Read(fc.CompRefProb[ctx]); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.CompRef[ctx][bit]; + } + + refFrame[idx] = cm.CompFixedRef; + refFrame[idx == 0 ? 1 : 0] = cm.CompVarRef[bit]; + } + else if (mode == ReferenceMode.SingleReference) + { + int ctx0 = PredCommon.GetPredContextSingleRefP1(ref xd); + int bit0 = r.Read(fc.SingleRefProb[ctx0][0]); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.SingleRef[ctx0][0][bit0]; + } + + if (bit0 != 0) + { + int ctx1 = PredCommon.GetPredContextSingleRefP2(ref xd); + int bit1 = r.Read(fc.SingleRefProb[ctx1][1]); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.SingleRef[ctx1][1][bit1]; + } + + refFrame[0] = (sbyte)(bit1 != 0 ? Constants.AltRefFrame : Constants.GoldenFrame); + } + else + { + refFrame[0] = Constants.LastFrame; + } + + refFrame[1] = Constants.None; + } + else + { + Debug.Assert(false, "Invalid prediction mode."); + } + } + } + + private static byte ReadSwitchableInterpFilter(ref Vp9Common cm, ref MacroBlockD xd, ref Reader r) + { + int ctx = xd.GetPredContextSwitchableInterp(); + byte type = (byte)r.ReadTree(Luts.Vp9SwitchableInterpTree, cm.Fc.Value.SwitchableInterpProb[ctx].ToSpan()); + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.SwitchableInterp[ctx][type]; + } + + return type; + } + + private static void ReadIntraBlockModeInfo(ref Vp9Common cm, ref MacroBlockD xd, ref ModeInfo mi, ref Reader r) + { + BlockSize bsize = mi.SbType; + int i; + + switch (bsize) + { + case BlockSize.Block4x4: + for (i = 0; i < 4; ++i) + { + mi.Bmi[i].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0); + } + + mi.Mode = mi.Bmi[3].Mode; + break; + case BlockSize.Block4x8: + mi.Bmi[0].Mode = mi.Bmi[2].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0); + mi.Bmi[1].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0); + break; + case BlockSize.Block8x4: + mi.Bmi[0].Mode = mi.Bmi[1].Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0); + mi.Bmi[2].Mode = mi.Bmi[3].Mode = mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, 0); + break; + default: mi.Mode = ReadIntraModeY(ref cm, ref xd, ref r, Luts.SizeGroupLookup[(int)bsize]); break; + } + + mi.UvMode = ReadIntraModeUv(ref cm, ref xd, ref r, (byte)mi.Mode); + + // Initialize interp_filter here so we do not have to check for inter block + // modes in GetPredContextSwitchableInterp() + mi.InterpFilter = Constants.SwitchableFilters; + + mi.RefFrame[0] = Constants.IntraFrame; + mi.RefFrame[1] = Constants.None; + } + + private static bool IsMvValid(ref Mv mv) + { + return mv.Row > Constants.MvLow && + mv.Row < Constants.MvUpp && + mv.Col > Constants.MvLow && + mv.Col < Constants.MvUpp; + } + + private static void CopyMvPair(ref Array2 dst, ref Array2 src) + { + dst[0] = src[0]; + dst[1] = src[1]; + } + + private static void ZeroMvPair(ref Array2 dst) + { + dst[0] = new Mv(); + dst[1] = new Mv(); + } + + private static bool AssignMv( + ref Vp9Common cm, + ref MacroBlockD xd, + PredictionMode mode, + ref Array2 mv, + ref Array2 refMv, + ref Array2 nearNearestMv, + int isCompound, + bool allowHP, + ref Reader r) + { + int i; + bool ret = true; + + switch (mode) + { + case PredictionMode.NewMv: + { + for (i = 0; i < 1 + isCompound; ++i) + { + ReadMv(ref r, ref mv[i], ref refMv[i], ref cm.Fc.Value, xd.Counts, allowHP); + ret = ret && IsMvValid(ref mv[i]); + } + break; + } + case PredictionMode.NearMv: + case PredictionMode.NearestMv: + { + CopyMvPair(ref mv, ref nearNearestMv); + break; + } + case PredictionMode.ZeroMv: + { + ZeroMvPair(ref mv); + break; + } + default: return false; + } + return ret; + } + + private static bool ReadIsInterBlock(ref Vp9Common cm, ref MacroBlockD xd, int segmentId, ref Reader r) + { + if (cm.Seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlRefFrame) != 0) + { + return cm.Seg.GetSegData(segmentId, SegLvlFeatures.SegLvlRefFrame) != Constants.IntraFrame; + } + else + { + int ctx = xd.GetIntraInterContext(); + bool isInter = r.Read(cm.Fc.Value.IntraInterProb[ctx]) != 0; + if (!xd.Counts.IsNull) + { + ++xd.Counts.Value.IntraInter[ctx][isInter ? 1 : 0]; + } + + return isInter; + } + } + + private static void DecFindBestRefMvs(bool allowHP, Span mvlist, ref Mv bestMv, int refmvCount) + { + int i; + + // Make sure all the candidates are properly clamped etc + for (i = 0; i < refmvCount; ++i) + { + mvlist[i].LowerMvPrecision(allowHP); + bestMv = mvlist[i]; + } + } + + private static bool AddMvRefListEb(Mv mv, ref int refMvCount, Span mvRefList, bool earlyBreak) + { + if (refMvCount != 0) + { + if (Unsafe.As(ref mv) != Unsafe.As(ref mvRefList[0])) + { + mvRefList[refMvCount] = mv; + refMvCount++; + return true; + } + } + else + { + mvRefList[refMvCount++] = mv; + if (earlyBreak) + { + return true; + } + } + + return false; + } + + // Performs mv sign inversion if indicated by the reference frame combination. + private static Mv ScaleMv(ref ModeInfo mi, int refr, sbyte thisRefFrame, ref Array4 refSignBias) + { + Mv mv = mi.Mv[refr]; + if (refSignBias[mi.RefFrame[refr]] != refSignBias[thisRefFrame]) + { + mv.Row *= -1; + mv.Col *= -1; + } + return mv; + } + + private static bool IsDiffRefFrameAddMvEb( + ref ModeInfo mbmi, + sbyte refFrame, + ref Array4 refSignBias, + ref int refmvCount, + Span mvRefList, + bool earlyBreak) + { + if (mbmi.IsInterBlock()) + { + if (mbmi.RefFrame[0] != refFrame) + { + if (AddMvRefListEb(ScaleMv(ref mbmi, 0, refFrame, ref refSignBias), ref refmvCount, mvRefList, earlyBreak)) + { + return true; + } + } + if (mbmi.HasSecondRef() && mbmi.RefFrame[1] != refFrame && Unsafe.As(ref mbmi.Mv[1]) != Unsafe.As(ref mbmi.Mv[0])) + { + if (AddMvRefListEb(ScaleMv(ref mbmi, 1, refFrame, ref refSignBias), ref refmvCount, mvRefList, earlyBreak)) + { + return true; + } + } + + } + return false; + } + + // This function searches the neighborhood of a given MB/SB + // to try and find candidate reference vectors. + private static unsafe int DecFindMvRefs( + ref Vp9Common cm, + ref MacroBlockD xd, + PredictionMode mode, + sbyte refFrame, + Span mvRefSearch, + Span mvRefList, + int miRow, + int miCol, + int block, + int isSub8X8) + { + ref Array4 refSignBias = ref cm.RefFrameSignBias; + int i, refmvCount = 0; + bool differentRefFound = false; + Ptr prevFrameMvs = cm.UsePrevFrameMvs ? new Ptr(ref cm.PrevFrameMvs[miRow * cm.MiCols + miCol]) : Ptr.Null; + ref TileInfo tile = ref xd.Tile; + // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop + // searching after the first mv is found. + bool earlyBreak = mode != PredictionMode.NearMv; + + // Blank the reference vector list + mvRefList.Slice(0, Constants.MaxMvRefCandidates).Fill(new Mv()); + + i = 0; + if (isSub8X8 != 0) + { + // If the size < 8x8 we get the mv from the bmi substructure for the + // nearest two blocks. + for (i = 0; i < 2; ++i) + { + ref Position mvRef = ref mvRefSearch[i]; + if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef)) + { + ref ModeInfo candidateMi = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value; + differentRefFound = true; + + if (candidateMi.RefFrame[0] == refFrame) + { + if (AddMvRefListEb(candidateMi.GetSubBlockMv(0, mvRef.Col, block), ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + else if (candidateMi.RefFrame[1] == refFrame) + { + if (AddMvRefListEb(candidateMi.GetSubBlockMv(1, mvRef.Col, block), ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + } + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MvrefNeighbours; ++i) + { + ref Position mvRef = ref mvRefSearch[i]; + if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef)) + { + ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value; + differentRefFound = true; + + if (candidate.RefFrame[0] == refFrame) + { + if (AddMvRefListEb(candidate.Mv[0], ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + else if (candidate.RefFrame[1] == refFrame) + { + if (AddMvRefListEb(candidate.Mv[1], ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + } + } + + // Check the last frame's mode and mv info. + if (!prevFrameMvs.IsNull) + { + if (prevFrameMvs.Value.RefFrame[0] == refFrame) + { + if (AddMvRefListEb(prevFrameMvs.Value.Mv[0], ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + else if (prevFrameMvs.Value.RefFrame[1] == refFrame) + { + if (AddMvRefListEb(prevFrameMvs.Value.Mv[1], ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (differentRefFound) + { + for (i = 0; i < MvrefNeighbours; ++i) + { + ref Position mvRef = ref mvRefSearch[i]; + if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef)) + { + ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value; + + // If the candidate is Intra we don't want to consider its mv. + if (IsDiffRefFrameAddMvEb(ref candidate, refFrame, ref refSignBias, ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (!prevFrameMvs.IsNull) + { + if (prevFrameMvs.Value.RefFrame[0] != refFrame && prevFrameMvs.Value.RefFrame[0] > Constants.IntraFrame) + { + Mv mv = prevFrameMvs.Value.Mv[0]; + if (refSignBias[prevFrameMvs.Value.RefFrame[0]] != refSignBias[refFrame]) + { + mv.Row *= -1; + mv.Col *= -1; + } + if (AddMvRefListEb(mv, ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + + if (prevFrameMvs.Value.RefFrame[1] > Constants.IntraFrame && + prevFrameMvs.Value.RefFrame[1] != refFrame && + Unsafe.As(ref prevFrameMvs.Value.Mv[1]) != Unsafe.As(ref prevFrameMvs.Value.Mv[0])) + { + Mv mv = prevFrameMvs.Value.Mv[1]; + if (refSignBias[prevFrameMvs.Value.RefFrame[1]] != refSignBias[refFrame]) + { + mv.Row *= -1; + mv.Col *= -1; + } + if (AddMvRefListEb(mv, ref refmvCount, mvRefList, earlyBreak)) + { + goto Done; + } + } + } + + if (mode == PredictionMode.NearMv) + { + refmvCount = Constants.MaxMvRefCandidates; + } + else + { + // We only care about the nearestmv for the remaining modes + refmvCount = 1; + } + + Done: + // Clamp vectors + for (i = 0; i < refmvCount; ++i) + { + mvRefList[i].ClampMvRef(ref xd); + } + + return refmvCount; + } + + private static void AppendSub8x8MvsForIdx( + ref Vp9Common cm, + ref MacroBlockD xd, + Span mvRefSearch, + PredictionMode bMode, + int block, + int refr, + int miRow, + int miCol, + ref Mv bestSub8x8) + { + Span mvList = stackalloc Mv[Constants.MaxMvRefCandidates]; + ref ModeInfo mi = ref xd.Mi[0].Value; + ref Array4 bmi = ref mi.Bmi; + int n; + int refmvCount; + + Debug.Assert(Constants.MaxMvRefCandidates == 2); + + refmvCount = DecFindMvRefs(ref cm, ref xd, bMode, mi.RefFrame[refr], mvRefSearch, mvList, miRow, miCol, block, 1); + + switch (block) + { + case 0: bestSub8x8 = mvList[refmvCount - 1]; break; + case 1: + case 2: + if (bMode == PredictionMode.NearestMv) + { + bestSub8x8 = bmi[0].Mv[refr]; + } + else + { + bestSub8x8 = new Mv(); + for (n = 0; n < refmvCount; ++n) + { + if (Unsafe.As(ref bmi[0].Mv[refr]) != Unsafe.As(ref mvList[n])) + { + bestSub8x8 = mvList[n]; + break; + } + } + } + break; + case 3: + if (bMode == PredictionMode.NearestMv) + { + bestSub8x8 = bmi[2].Mv[refr]; + } + else + { + Span candidates = stackalloc Mv[2 + Constants.MaxMvRefCandidates]; + candidates[0] = bmi[1].Mv[refr]; + candidates[1] = bmi[0].Mv[refr]; + candidates[2] = mvList[0]; + candidates[3] = mvList[1]; + bestSub8x8 = new Mv(); + for (n = 0; n < 2 + Constants.MaxMvRefCandidates; ++n) + { + if (Unsafe.As(ref bmi[2].Mv[refr]) != Unsafe.As(ref candidates[n])) + { + bestSub8x8 = candidates[n]; + break; + } + } + } + break; + default: Debug.Assert(false, "Invalid block index."); break; + } + } + + private static byte GetModeContext(ref Vp9Common cm, ref MacroBlockD xd, Span mvRefSearch, int miRow, int miCol) + { + int i; + int contextCounter = 0; + ref TileInfo tile = ref xd.Tile; + + // Get mode count from nearest 2 blocks + for (i = 0; i < 2; ++i) + { + ref Position mvRef = ref mvRefSearch[i]; + if (tile.IsInside(miCol, miRow, cm.MiRows, ref mvRef)) + { + ref ModeInfo candidate = ref xd.Mi[mvRef.Col + mvRef.Row * xd.MiStride].Value; + // Keep counts for entropy encoding. + contextCounter += Luts.Mode2Counter[(int)candidate.Mode]; + } + } + + return (byte)Luts.CounterToContext[contextCounter]; + } + + private static void ReadInterBlockModeInfo( + ref Vp9Common cm, + ref MacroBlockD xd, + ref ModeInfo mi, + int miRow, + int miCol, + ref Reader r) + { + BlockSize bsize = mi.SbType; + bool allowHP = cm.AllowHighPrecisionMv; + Array2 bestRefMvs = new Array2(); + int refr, isCompound; + byte interModeCtx; + Span mvRefSearch = Luts.MvRefBlocks[(int)bsize]; + + ReadRefFrames(ref cm, ref xd, ref r, mi.SegmentId, ref mi.RefFrame); + isCompound = mi.HasSecondRef() ? 1 : 0; + interModeCtx = GetModeContext(ref cm, ref xd, mvRefSearch, miRow, miCol); + + if (cm.Seg.IsSegFeatureActive(mi.SegmentId, SegLvlFeatures.SegLvlSkip) != 0) + { + mi.Mode = PredictionMode.ZeroMv; + if (bsize < BlockSize.Block8x8) + { + xd.ErrorInfo.Value.InternalError(CodecErr.CodecUnsupBitstream, "Invalid usage of segement feature on small blocks"); + return; + } + } + else + { + if (bsize >= BlockSize.Block8x8) + { + mi.Mode = ReadInterMode(ref cm, ref xd, ref r, interModeCtx); + } + else + { + // Sub 8x8 blocks use the nearestmv as a ref_mv if the bMode is NewMv. + // Setting mode to NearestMv forces the search to stop after the nearestmv + // has been found. After bModes have been read, mode will be overwritten + // by the last bMode. + mi.Mode = PredictionMode.NearestMv; + } + + if (mi.Mode != PredictionMode.ZeroMv) + { + for (refr = 0; refr < 1 + isCompound; ++refr) + { + Span tmpMvs = stackalloc Mv[Constants.MaxMvRefCandidates]; + sbyte frame = mi.RefFrame[refr]; + int refmvCount; + + refmvCount = DecFindMvRefs(ref cm, ref xd, mi.Mode, frame, mvRefSearch, tmpMvs, miRow, miCol, -1, 0); + + DecFindBestRefMvs(allowHP, tmpMvs, ref bestRefMvs[refr], refmvCount); + } + } + } + + mi.InterpFilter = (cm.InterpFilter == Constants.Switchable) ? ReadSwitchableInterpFilter(ref cm, ref xd, ref r) : cm.InterpFilter; + + if (bsize < BlockSize.Block8x8) + { + int num4X4W = 1 << xd.BmodeBlocksWl; + int num4X4H = 1 << xd.BmodeBlocksHl; + int idx, idy; + PredictionMode bMode = 0; + Array2 bestSub8x8 = new Array2(); + const uint invalidMv = 0x80008000; + // Initialize the 2nd element as even though it won't be used meaningfully + // if isCompound is false. + Unsafe.As(ref bestSub8x8[1]) = invalidMv; + for (idy = 0; idy < 2; idy += num4X4H) + { + for (idx = 0; idx < 2; idx += num4X4W) + { + int j = idy * 2 + idx; + bMode = ReadInterMode(ref cm, ref xd, ref r, interModeCtx); + + if (bMode == PredictionMode.NearestMv || bMode == PredictionMode.NearMv) + { + for (refr = 0; refr < 1 + isCompound; ++refr) + { + AppendSub8x8MvsForIdx(ref cm, ref xd, mvRefSearch, bMode, j, refr, miRow, miCol, ref bestSub8x8[refr]); + } + } + + if (!AssignMv(ref cm, ref xd, bMode, ref mi.Bmi[j].Mv, ref bestRefMvs, ref bestSub8x8, isCompound, allowHP, ref r)) + { + xd.Corrupted |= true; + break; + } + + if (num4X4H == 2) + { + mi.Bmi[j + 2] = mi.Bmi[j]; + } + + if (num4X4W == 2) + { + mi.Bmi[j + 1] = mi.Bmi[j]; + } + } + } + + mi.Mode = bMode; + + CopyMvPair(ref mi.Mv, ref mi.Bmi[3].Mv); + } + else + { + xd.Corrupted |= !AssignMv(ref cm, ref xd, mi.Mode, ref mi.Mv, ref bestRefMvs, ref bestRefMvs, isCompound, allowHP, ref r); + } + } + + private static void ReadInterFrameModeInfo( + ref Vp9Common cm, + ref MacroBlockD xd, + int miRow, + int miCol, + ref Reader r, + int xMis, + int yMis) + { + ref ModeInfo mi = ref xd.Mi[0].Value; + bool interBlock; + + mi.SegmentId = (sbyte)ReadInterSegmentId(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis); + mi.Skip = (sbyte)ReadSkip(ref cm, ref xd, mi.SegmentId, ref r); + interBlock = ReadIsInterBlock(ref cm, ref xd, mi.SegmentId, ref r); + mi.TxSize = ReadTxSize(ref cm, ref xd, mi.Skip == 0 || !interBlock, ref r); + + if (interBlock) + { + ReadInterBlockModeInfo(ref cm, ref xd, ref mi, miRow, miCol, ref r); + } + else + { + ReadIntraBlockModeInfo(ref cm, ref xd, ref mi, ref r); + } + } + + private static PredictionMode LeftBlockMode(Ptr curMi, Ptr leftMi, int b) + { + if (b == 0 || b == 2) + { + if (leftMi.IsNull || leftMi.Value.IsInterBlock()) + { + return PredictionMode.DcPred; + } + + return leftMi.Value.GetYMode(b + 1); + } + else + { + Debug.Assert(b == 1 || b == 3); + return curMi.Value.Bmi[b - 1].Mode; + } + } + + private static PredictionMode AboveBlockMode(Ptr curMi, Ptr aboveMi, int b) + { + if (b == 0 || b == 1) + { + if (aboveMi.IsNull || aboveMi.Value.IsInterBlock()) + { + return PredictionMode.DcPred; + } + + return aboveMi.Value.GetYMode(b + 2); + } + else + { + Debug.Assert(b == 2 || b == 3); + return curMi.Value.Bmi[b - 2].Mode; + } + } + + private static ReadOnlySpan GetYModeProbs( + ref Vp9EntropyProbs fc, + Ptr mi, + Ptr aboveMi, + Ptr leftMi, + int block) + { + PredictionMode above = AboveBlockMode(mi, aboveMi, block); + PredictionMode left = LeftBlockMode(mi, leftMi, block); + return fc.KfYModeProb[(int)above][(int)left].ToSpan(); + } + + private static void ReadIntraFrameModeInfo( + ref Vp9Common cm, + ref MacroBlockD xd, + int miRow, + int miCol, + ref Reader r, + int xMis, + int yMis) + { + Ptr mi = xd.Mi[0]; + Ptr aboveMi = xd.AboveMi; + Ptr leftMi = xd.LeftMi; + BlockSize bsize = mi.Value.SbType; + int i; + int miOffset = miRow * cm.MiCols + miCol; + + mi.Value.SegmentId = (sbyte)ReadIntraSegmentId(ref cm, miOffset, xMis, yMis, ref r); + mi.Value.Skip = (sbyte)ReadSkip(ref cm, ref xd, mi.Value.SegmentId, ref r); + mi.Value.TxSize = ReadTxSize(ref cm, ref xd, true, ref r); + mi.Value.RefFrame[0] = Constants.IntraFrame; + mi.Value.RefFrame[1] = Constants.None; + + switch (bsize) + { + case BlockSize.Block4x4: + for (i = 0; i < 4; ++i) + { + mi.Value.Bmi[i].Mode = + ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, i)); + } + + mi.Value.Mode = mi.Value.Bmi[3].Mode; + break; + case BlockSize.Block4x8: + mi.Value.Bmi[0].Mode = mi.Value.Bmi[2].Mode = + ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0)); + mi.Value.Bmi[1].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode = + ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 1)); + break; + case BlockSize.Block8x4: + mi.Value.Bmi[0].Mode = mi.Value.Bmi[1].Mode = + ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0)); + mi.Value.Bmi[2].Mode = mi.Value.Bmi[3].Mode = mi.Value.Mode = + ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 2)); + break; + default: + mi.Value.Mode = ReadIntraMode(ref r, GetYModeProbs(ref cm.Fc.Value, mi, aboveMi, leftMi, 0)); + break; + } + + mi.Value.UvMode = ReadIntraMode(ref r, cm.Fc.Value.KfUvModeProb[(int)mi.Value.Mode].ToSpan()); + } + + private static void CopyRefFramePair(ref Array2 dst, ref Array2 src) + { + dst[0] = src[0]; + dst[1] = src[1]; + } + + public static void ReadModeInfo( + ref TileWorkerData twd, + ref Vp9Common cm, + int miRow, + int miCol, + int xMis, + int yMis) + { + ref Reader r = ref twd.BitReader; + ref MacroBlockD xd = ref twd.Xd; + ref ModeInfo mi = ref xd.Mi[0].Value; + ArrayPtr frameMvs = cm.CurFrameMvs.Slice(miRow * cm.MiCols + miCol); + int w, h; + + if (cm.FrameIsIntraOnly()) + { + ReadIntraFrameModeInfo(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis); + } + else + { + ReadInterFrameModeInfo(ref cm, ref xd, miRow, miCol, ref r, xMis, yMis); + + for (h = 0; h < yMis; ++h) + { + for (w = 0; w < xMis; ++w) + { + ref MvRef mv = ref frameMvs[w]; + CopyRefFramePair(ref mv.RefFrame, ref mi.RefFrame); + CopyMvPair(ref mv.Mv, ref mi.Mv); + } + frameMvs = frameMvs.Slice(cm.MiCols); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs b/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs new file mode 100644 index 00000000..df3199cf --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Decoder.cs @@ -0,0 +1,164 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using Ryujinx.Graphics.Video; +using System; +using Vp9MvRef = Ryujinx.Graphics.Video.Vp9MvRef; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + public class Decoder : IVp9Decoder + { + public bool IsHardwareAccelerated => false; + + private readonly MemoryAllocator _allocator = new MemoryAllocator(); + + public ISurface CreateSurface(int width, int height) => new Surface(width, height); + + private static readonly byte[] LiteralToFilter = new byte[] + { + Constants.EightTapSmooth, + Constants.EightTap, + Constants.EightTapSharp, + Constants.Bilinear + }; + + public unsafe bool Decode( + ref Vp9PictureInfo pictureInfo, + ISurface output, + ReadOnlySpan bitstream, + ReadOnlySpan mvsIn, + Span mvsOut) + { + Vp9Common cm = new Vp9Common(); + + cm.FrameType = pictureInfo.IsKeyFrame ? FrameType.KeyFrame : FrameType.InterFrame; + cm.IntraOnly = pictureInfo.IntraOnly; + + cm.Width = output.Width; + cm.Height = output.Height; + + cm.UsePrevFrameMvs = pictureInfo.UsePrevInFindMvRefs; + + cm.RefFrameSignBias = pictureInfo.RefFrameSignBias; + + cm.BaseQindex = pictureInfo.BaseQIndex; + cm.YDcDeltaQ = pictureInfo.YDcDeltaQ; + cm.UvAcDeltaQ = pictureInfo.UvAcDeltaQ; + cm.UvDcDeltaQ = pictureInfo.UvDcDeltaQ; + + cm.Mb.Lossless = pictureInfo.Lossless; + + cm.TxMode = (TxMode)pictureInfo.TransformMode; + + cm.AllowHighPrecisionMv = pictureInfo.AllowHighPrecisionMv; + + cm.InterpFilter = (byte)pictureInfo.InterpFilter; + + if (cm.InterpFilter != Constants.Switchable) + { + cm.InterpFilter = LiteralToFilter[cm.InterpFilter]; + } + + cm.ReferenceMode = (ReferenceMode)pictureInfo.ReferenceMode; + + cm.CompFixedRef = pictureInfo.CompFixedRef; + cm.CompVarRef = pictureInfo.CompVarRef; + + cm.Log2TileCols = pictureInfo.Log2TileCols; + cm.Log2TileRows = pictureInfo.Log2TileRows; + + cm.Seg.Enabled = pictureInfo.SegmentEnabled; + cm.Seg.UpdateMap = pictureInfo.SegmentMapUpdate; + cm.Seg.TemporalUpdate = pictureInfo.SegmentMapTemporalUpdate; + cm.Seg.AbsDelta = (byte)pictureInfo.SegmentAbsDelta; + cm.Seg.FeatureMask = pictureInfo.SegmentFeatureEnable; + cm.Seg.FeatureData = pictureInfo.SegmentFeatureData; + + cm.Lf.ModeRefDeltaEnabled = pictureInfo.ModeRefDeltaEnabled; + cm.Lf.RefDeltas = pictureInfo.RefDeltas; + cm.Lf.ModeDeltas = pictureInfo.ModeDeltas; + + cm.Fc = new Ptr(ref pictureInfo.Entropy); + cm.Counts = new Ptr(ref pictureInfo.BackwardUpdateCounts); + + cm.FrameRefs[0].Buf = (Surface)pictureInfo.LastReference; + cm.FrameRefs[1].Buf = (Surface)pictureInfo.GoldenReference; + cm.FrameRefs[2].Buf = (Surface)pictureInfo.AltReference; + cm.Mb.CurBuf = (Surface)output; + + cm.Mb.SetupBlockPlanes(1, 1); + + cm.AllocTileWorkerData(_allocator, 1 << pictureInfo.Log2TileCols, 1 << pictureInfo.Log2TileRows); + cm.AllocContextBuffers(_allocator, output.Width, output.Height); + cm.InitContextBuffers(); + cm.SetupSegmentationDequant(); + cm.SetupScaleFactors(); + + SetMvs(ref cm, mvsIn); + + fixed (byte* dataPtr = bitstream) + { + try + { + DecodeFrame.DecodeTiles(ref cm, new ArrayPtr(dataPtr, bitstream.Length)); + } + catch (InternalErrorException) + { + return false; + } + } + + GetMvs(ref cm, mvsOut); + + cm.FreeTileWorkerData(_allocator); + cm.FreeContextBuffers(_allocator); + + return true; + } + + private static void SetMvs(ref Vp9Common cm, ReadOnlySpan mvs) + { + if (mvs.Length > cm.PrevFrameMvs.Length) + { + throw new ArgumentException($"Size mismatch, expected: {cm.PrevFrameMvs.Length}, but got: {mvs.Length}."); + } + + for (int i = 0; i < mvs.Length; i++) + { + ref var mv = ref cm.PrevFrameMvs[i]; + + mv.Mv[0].Row = mvs[i].Mvs[0].Row; + mv.Mv[0].Col = mvs[i].Mvs[0].Col; + mv.Mv[1].Row = mvs[i].Mvs[1].Row; + mv.Mv[1].Col = mvs[i].Mvs[1].Col; + + mv.RefFrame[0] = (sbyte)mvs[i].RefFrames[0]; + mv.RefFrame[1] = (sbyte)mvs[i].RefFrames[1]; + } + } + + private static void GetMvs(ref Vp9Common cm, Span mvs) + { + if (mvs.Length > cm.CurFrameMvs.Length) + { + throw new ArgumentException($"Size mismatch, expected: {cm.CurFrameMvs.Length}, but got: {mvs.Length}."); + } + + for (int i = 0; i < mvs.Length; i++) + { + ref var mv = ref cm.CurFrameMvs[i]; + + mvs[i].Mvs[0].Row = mv.Mv[0].Row; + mvs[i].Mvs[0].Col = mv.Mv[0].Col; + mvs[i].Mvs[1].Row = mv.Mv[1].Row; + mvs[i].Mvs[1].Col = mv.Mv[1].Col; + + mvs[i].RefFrames[0] = mv.RefFrame[0]; + mvs[i].RefFrames[1] = mv.RefFrame[1]; + } + } + + public void Dispose() => _allocator.Dispose(); + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs new file mode 100644 index 00000000..7ede6d34 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Detokenize.cs @@ -0,0 +1,325 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Dsp; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; +using System.Runtime.InteropServices; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class Detokenize + { + private const int EobContextNode = 0; + private const int ZeroContextNode = 1; + private const int OneContextNode = 2; + + private static int GetCoefContext(ReadOnlySpan neighbors, ReadOnlySpan tokenCache, int c) + { + const int maxNeighbors = 2; + + return (1 + tokenCache[neighbors[maxNeighbors * c + 0]] + tokenCache[neighbors[maxNeighbors * c + 1]]) >> 1; + } + + private static int ReadCoeff( + ref Reader r, + ReadOnlySpan probs, + int n, + ref ulong value, + ref int count, + ref uint range) + { + int i, val = 0; + for (i = 0; i < n; ++i) + { + val = (val << 1) | r.ReadBool(probs[i], ref value, ref count, ref range); + } + + return val; + } + + private static int DecodeCoefs( + ref MacroBlockD xd, + PlaneType type, + Span dqcoeff, + TxSize txSize, + ref Array2 dq, + int ctx, + ReadOnlySpan scan, + ReadOnlySpan nb, + ref Reader r) + { + ref Vp9BackwardUpdates counts = ref xd.Counts.Value; + int maxEob = 16 << ((int)txSize << 1); + ref Vp9EntropyProbs fc = ref xd.Fc.Value; + int refr = xd.Mi[0].Value.IsInterBlock() ? 1 : 0; + int band, c = 0; + ref Array6>> coefProbs = ref fc.CoefProbs[(int)txSize][(int)type][refr]; + Span tokenCache = stackalloc byte[32 * 32]; + ReadOnlySpan bandTranslate = Luts.get_band_translate(txSize); + int dqShift = (txSize == TxSize.Tx32x32) ? 1 : 0; + int v; + short dqv = dq[0]; + ReadOnlySpan cat6Prob = (xd.Bd == 12) + ? Luts.Vp9Cat6ProbHigh12 + : (xd.Bd == 10) ? new ReadOnlySpan(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob; + int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14; + // Keep value, range, and count as locals. The compiler produces better + // results with the locals than using r directly. + ulong value = r.Value; + uint range = r.Range; + int count = r.Count; + + while (c < maxEob) + { + int val = -1; + band = bandTranslate[0]; + bandTranslate = bandTranslate.Slice(1); + ref Array3 prob = ref coefProbs[band][ctx]; + if (!xd.Counts.IsNull) + { + ++counts.EobBranch[(int)txSize][(int)type][refr][band][ctx]; + } + + if (r.ReadBool(prob[EobContextNode], ref value, ref count, ref range) == 0) + { + if (!xd.Counts.IsNull) + { + ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.EobModelToken]; + } + + break; + } + + while (r.ReadBool(prob[ZeroContextNode], ref value, ref count, ref range) == 0) + { + if (!xd.Counts.IsNull) + { + ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.ZeroToken]; + } + + dqv = dq[1]; + tokenCache[scan[c]] = 0; + ++c; + if (c >= maxEob) + { + r.Value = value; + r.Range = range; + r.Count = count; + return c; // Zero tokens at the end (no eob token) + } + ctx = GetCoefContext(nb, tokenCache, c); + band = bandTranslate[0]; + bandTranslate = bandTranslate.Slice(1); + prob = ref coefProbs[band][ctx]; + } + + if (r.ReadBool(prob[OneContextNode], ref value, ref count, ref range) != 0) + { + ReadOnlySpan p = Luts.Vp9Pareto8Full[prob[Constants.PivotNode] - 1]; + if (!xd.Counts.IsNull) + { + ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.TwoToken]; + } + + if (r.ReadBool(p[0], ref value, ref count, ref range) != 0) + { + if (r.ReadBool(p[3], ref value, ref count, ref range) != 0) + { + tokenCache[scan[c]] = 5; + if (r.ReadBool(p[5], ref value, ref count, ref range) != 0) + { + if (r.ReadBool(p[7], ref value, ref count, ref range) != 0) + { + val = Constants.Cat6MinVal + ReadCoeff(ref r, cat6Prob, cat6Bits, ref value, ref count, ref range); + } + else + { + val = Constants.Cat5MinVal + ReadCoeff(ref r, Luts.Vp9Cat5Prob, 5, ref value, ref count, ref range); + } + } + else if (r.ReadBool(p[6], ref value, ref count, ref range) != 0) + { + val = Constants.Cat4MinVal + ReadCoeff(ref r, Luts.Vp9Cat4Prob, 4, ref value, ref count, ref range); + } + else + { + val = Constants.Cat3MinVal + ReadCoeff(ref r, Luts.Vp9Cat3Prob, 3, ref value, ref count, ref range); + } + } + else + { + tokenCache[scan[c]] = 4; + if (r.ReadBool(p[4], ref value, ref count, ref range) != 0) + { + val = Constants.Cat2MinVal + ReadCoeff(ref r, Luts.Vp9Cat2Prob, 2, ref value, ref count, ref range); + } + else + { + val = Constants.Cat1MinVal + ReadCoeff(ref r, Luts.Vp9Cat1Prob, 1, ref value, ref count, ref range); + } + } + // Val may use 18-bits + v = (int)(((long)val * dqv) >> dqShift); + } + else + { + if (r.ReadBool(p[1], ref value, ref count, ref range) != 0) + { + tokenCache[scan[c]] = 3; + v = ((3 + r.ReadBool(p[2], ref value, ref count, ref range)) * dqv) >> dqShift; + } + else + { + tokenCache[scan[c]] = 2; + v = (2 * dqv) >> dqShift; + } + } + } + else + { + if (!xd.Counts.IsNull) + { + ++counts.Coef[(int)txSize][(int)type][refr][band][ctx][Constants.OneToken]; + } + + tokenCache[scan[c]] = 1; + v = dqv >> dqShift; + } + dqcoeff[scan[c]] = (int)HighbdCheckRange(r.ReadBool(128, ref value, ref count, ref range) != 0 ? -v : v, xd.Bd); + ++c; + ctx = GetCoefContext(nb, tokenCache, c); + dqv = dq[1]; + } + + r.Value = value; + r.Range = range; + r.Count = count; + return c; + } + + private static void GetCtxShift(ref MacroBlockD xd, ref int ctxShiftA, ref int ctxShiftL, int x, int y, uint txSizeInBlocks) + { + if (xd.MaxBlocksWide != 0) + { + if (txSizeInBlocks + x > xd.MaxBlocksWide) + { + ctxShiftA = (int)(txSizeInBlocks - (xd.MaxBlocksWide - x)) * 8; + } + } + if (xd.MaxBlocksHigh != 0) + { + if (txSizeInBlocks + y > xd.MaxBlocksHigh) + { + ctxShiftL = (int)(txSizeInBlocks - (xd.MaxBlocksHigh - y)) * 8; + } + } + } + + private static PlaneType GetPlaneType(int plane) + { + return (PlaneType)(plane > 0 ? 1 : 0); + } + + public static int DecodeBlockTokens( + ref TileWorkerData twd, + int plane, + Luts.ScanOrder sc, + int x, + int y, + TxSize txSize, + int segId) + { + ref Reader r = ref twd.BitReader; + ref MacroBlockD xd = ref twd.Xd; + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + ref Array2 dequant = ref pd.SegDequant[segId]; + int eob; + Span a = pd.AboveContext.ToSpan().Slice(x); + Span l = pd.LeftContext.ToSpan().Slice(y); + int ctx; + int ctxShiftA = 0; + int ctxShiftL = 0; + + switch (txSize) + { + case TxSize.Tx4x4: + ctx = a[0] != 0 ? 1 : 0; + ctx += l[0] != 0 ? 1 : 0; + eob = DecodeCoefs( + ref xd, + GetPlaneType(plane), + pd.DqCoeff.ToSpan(), + txSize, + ref dequant, + ctx, + sc.Scan, + sc.Neighbors, + ref r); + a[0] = l[0] = (sbyte)(eob > 0 ? 1 : 0); + break; + case TxSize.Tx8x8: + GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx8x8); + ctx = MemoryMarshal.Cast(a)[0] != 0 ? 1 : 0; + ctx += MemoryMarshal.Cast(l)[0] != 0 ? 1 : 0; + eob = DecodeCoefs( + ref xd, + GetPlaneType(plane), + pd.DqCoeff.ToSpan(), + txSize, + ref dequant, + ctx, + sc.Scan, + sc.Neighbors, + ref r); + MemoryMarshal.Cast(a)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftA); + MemoryMarshal.Cast(l)[0] = (ushort)((eob > 0 ? 0x0101 : 0) >> ctxShiftL); + break; + case TxSize.Tx16x16: + GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx16x16); + ctx = MemoryMarshal.Cast(a)[0] != 0 ? 1 : 0; + ctx += MemoryMarshal.Cast(l)[0] != 0 ? 1 : 0; + eob = DecodeCoefs( + ref xd, + GetPlaneType(plane), + pd.DqCoeff.ToSpan(), + txSize, + ref dequant, + ctx, + sc.Scan, + sc.Neighbors, + ref r); + MemoryMarshal.Cast(a)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftA); + MemoryMarshal.Cast(l)[0] = (uint)((eob > 0 ? 0x01010101 : 0) >> ctxShiftL); + break; + case TxSize.Tx32x32: + GetCtxShift(ref xd, ref ctxShiftA, ref ctxShiftL, x, y, 1 << (int)TxSize.Tx32x32); + // NOTE: Casting to ulong here is safe because the default memory + // alignment is at least 8 bytes and the Tx32x32 is aligned on 8 byte + // boundaries. + ctx = MemoryMarshal.Cast(a)[0] != 0 ? 1 : 0; + ctx += MemoryMarshal.Cast(l)[0] != 0 ? 1 : 0; + eob = DecodeCoefs( + ref xd, + GetPlaneType(plane), + pd.DqCoeff.ToSpan(), + txSize, + ref dequant, + ctx, + sc.Scan, + sc.Neighbors, + ref r); + MemoryMarshal.Cast(a)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftA; + MemoryMarshal.Cast(l)[0] = (eob > 0 ? 0x0101010101010101UL : 0) >> ctxShiftL; + break; + default: + Debug.Assert(false, "Invalid transform size."); + eob = 0; + break; + } + + return eob; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs new file mode 100644 index 00000000..b74c33dc --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs @@ -0,0 +1,949 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class Convolve + { + private const bool UseIntrinsics = true; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 MultiplyAddAdjacent( + Vector128 vsrc0, + Vector128 vsrc1, + Vector128 vsrc2, + Vector128 vsrc3, + Vector128 vfilter, + Vector128 zero) + { + // < sumN, sumN, sumN, sumN > + Vector128 sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter); + Vector128 sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter); + Vector128 sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter); + Vector128 sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter); + + // < 0, 0, sumN, sumN > + sum0 = Ssse3.HorizontalAdd(sum0, zero); + sum1 = Ssse3.HorizontalAdd(sum1, zero); + sum2 = Ssse3.HorizontalAdd(sum2, zero); + sum3 = Ssse3.HorizontalAdd(sum3, zero); + + // < 0, 0, 0, sumN > + sum0 = Ssse3.HorizontalAdd(sum0, zero); + sum1 = Ssse3.HorizontalAdd(sum1, zero); + sum2 = Ssse3.HorizontalAdd(sum2, zero); + sum3 = Ssse3.HorizontalAdd(sum3, zero); + + // < 0, 0, sum1, sum0 > + Vector128 sum01 = Sse2.UnpackLow(sum0, sum1); + + // < 0, 0, sum3, sum2 > + Vector128 sum23 = Sse2.UnpackLow(sum2, sum3); + + // < sum3, sum2, sum1, sum0 > + return Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 RoundShift(Vector128 value, Vector128 const64) + { + return Sse2.ShiftRightArithmetic(Sse2.Add(value, const64), FilterBits); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 PackUnsignedSaturate(Vector128 value, Vector128 zero) + { + return Sse2.PackUnsignedSaturate(Sse41.PackUnsignedSaturate(value, zero).AsInt16(), zero.AsInt16()); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void ConvolveHorizSse41( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] xFilters, + int x0Q4, + int w, + int h) + { + Vector128 zero = Vector128.Zero; + Vector128 const64 = Vector128.Create(64); + + ulong x, y; + src -= SubpelTaps / 2 - 1; + + fixed (Array8* xFilter = xFilters) + { + Vector128 vfilter = Sse2.LoadVector128((short*)xFilter + (uint)(x0Q4 & SubpelMask) * 8); + + for (y = 0; y < (uint)h; ++y) + { + ulong srcOffset = (uint)x0Q4 >> SubpelBits; + for (x = 0; x < (uint)w; x += 4) + { + Vector128 vsrc0 = Sse41.ConvertToVector128Int16(&src[srcOffset + x]); + Vector128 vsrc1 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 1]); + Vector128 vsrc2 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 2]); + Vector128 vsrc3 = Sse41.ConvertToVector128Int16(&src[srcOffset + x + 3]); + + Vector128 sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero); + + Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle()); + } + src += srcStride; + dst += dstStride; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void ConvolveHoriz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] xFilters, + int x0Q4, + int xStepQ4, + int w, + int h) + { + if (Sse41.IsSupported && UseIntrinsics && xStepQ4 == 1 << SubpelBits) + { + ConvolveHorizSse41(src, srcStride, dst, dstStride, xFilters, x0Q4, w, h); + return; + } + + int x, y; + src -= SubpelTaps / 2 - 1; + + for (y = 0; y < h; ++y) + { + int xQ4 = x0Q4; + for (x = 0; x < w; ++x) + { + byte* srcX = &src[xQ4 >> SubpelBits]; + ref Array8 xFilter = ref xFilters[xQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcX[k] * xFilter[k]; + } + + dst[x] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)); + xQ4 += xStepQ4; + } + src += srcStride; + dst += dstStride; + } + } + + private static unsafe void ConvolveAvgHoriz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] xFilters, + int x0Q4, + int xStepQ4, + int w, + int h) + { + int x, y; + src -= SubpelTaps / 2 - 1; + + for (y = 0; y < h; ++y) + { + int xQ4 = x0Q4; + for (x = 0; x < w; ++x) + { + byte* srcX = &src[xQ4 >> SubpelBits]; + ref Array8 xFilter = ref xFilters[xQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcX[k] * xFilter[k]; + } + + dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1); + xQ4 += xStepQ4; + } + src += srcStride; + dst += dstStride; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void ConvolveVertAvx2( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] yFilters, + int y0Q4, + int w, + int h) + { + Vector128 zero = Vector128.Zero; + Vector128 const64 = Vector128.Create(64); + Vector256 indices = Vector256.Create( + 0, + srcStride, + srcStride * 2, + srcStride * 3, + srcStride * 4, + srcStride * 5, + srcStride * 6, + srcStride * 7); + + ulong x, y; + src -= srcStride * (SubpelTaps / 2 - 1); + + fixed (Array8* yFilter = yFilters) + { + Vector128 vfilter = Sse2.LoadVector128((short*)yFilter + (uint)(y0Q4 & SubpelMask) * 8); + + ulong srcBaseY = (uint)y0Q4 >> SubpelBits; + for (y = 0; y < (uint)h; ++y) + { + ulong srcOffset = (srcBaseY + y) * (uint)srcStride; + for (x = 0; x < (uint)w; x += 4) + { + Vector256 vsrc = Avx2.GatherVector256((uint*)&src[srcOffset + x], indices, 1).AsInt32(); + + Vector128 vsrcL = vsrc.GetLower(); + Vector128 vsrcH = vsrc.GetUpper(); + + Vector128 vsrcUnpck11 = Sse2.UnpackLow(vsrcL.AsByte(), vsrcH.AsByte()); + Vector128 vsrcUnpck12 = Sse2.UnpackHigh(vsrcL.AsByte(), vsrcH.AsByte()); + + Vector128 vsrcUnpck21 = Sse2.UnpackLow(vsrcUnpck11, vsrcUnpck12); + Vector128 vsrcUnpck22 = Sse2.UnpackHigh(vsrcUnpck11, vsrcUnpck12); + + Vector128 vsrc01 = Sse2.UnpackLow(vsrcUnpck21, vsrcUnpck22); + Vector128 vsrc23 = Sse2.UnpackHigh(vsrcUnpck21, vsrcUnpck22); + + Vector128 vsrc11 = Sse.MoveHighToLow(vsrc01.AsSingle(), vsrc01.AsSingle()).AsByte(); + Vector128 vsrc33 = Sse.MoveHighToLow(vsrc23.AsSingle(), vsrc23.AsSingle()).AsByte(); + + Vector128 vsrc0 = Sse41.ConvertToVector128Int16(vsrc01); + Vector128 vsrc1 = Sse41.ConvertToVector128Int16(vsrc11); + Vector128 vsrc2 = Sse41.ConvertToVector128Int16(vsrc23); + Vector128 vsrc3 = Sse41.ConvertToVector128Int16(vsrc33); + + Vector128 sum0123 = MultiplyAddAdjacent(vsrc0, vsrc1, vsrc2, vsrc3, vfilter, zero); + + Sse.StoreScalar((float*)&dst[x], PackUnsignedSaturate(RoundShift(sum0123, const64), zero).AsSingle()); + } + dst += dstStride; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void ConvolveVert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] yFilters, + int y0Q4, + int yStepQ4, + int w, + int h) + { + if (Avx2.IsSupported && UseIntrinsics && yStepQ4 == 1 << SubpelBits) + { + ConvolveVertAvx2(src, srcStride, dst, dstStride, yFilters, y0Q4, w, h); + return; + } + + int x, y; + src -= srcStride * (SubpelTaps / 2 - 1); + + for (x = 0; x < w; ++x) + { + int yQ4 = y0Q4; + for (y = 0; y < h; ++y) + { + byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride]; + ref Array8 yFilter = ref yFilters[yQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcY[k * srcStride] * yFilter[k]; + } + + dst[y * dstStride] = BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)); + yQ4 += yStepQ4; + } + ++src; + ++dst; + } + } + + private static unsafe void ConvolveAvgVert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] yFilters, + int y0Q4, + int yStepQ4, + int w, + int h) + { + int x, y; + src -= srcStride * (SubpelTaps / 2 - 1); + + for (x = 0; x < w; ++x) + { + int yQ4 = y0Q4; + for (y = 0; y < h; ++y) + { + byte* srcY = &src[(yQ4 >> SubpelBits) * srcStride]; + ref Array8 yFilter = ref yFilters[yQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcY[k * srcStride] * yFilter[k]; + } + + dst[y * dstStride] = (byte)BitUtils.RoundPowerOfTwo( + dst[y * dstStride] + BitUtils.ClipPixel(BitUtils.RoundPowerOfTwo(sum, FilterBits)), 1); + yQ4 += yStepQ4; + } + ++src; + ++dst; + } + } + + public static unsafe void Convolve8Horiz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + ConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h); + } + + public static unsafe void Convolve8AvgHoriz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + ConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h); + } + + public static unsafe void Convolve8Vert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + ConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h); + } + + public static unsafe void Convolve8AvgVert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h); + } + + [StructLayout(LayoutKind.Sequential, Size = 64 * 135)] + struct Temp + { + } + + public static unsafe void Convolve8( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SubpelTaps rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + // When calling in frame scaling function, the smallest scaling factor is x1/4 + // ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still + // big enough. + Temp tempStruct; + byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization. + int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps; + + Debug.Assert(w <= 64); + Debug.Assert(h <= 64); + Debug.Assert(yStepQ4 <= 32 || (yStepQ4 <= 64 && h <= 32)); + Debug.Assert(xStepQ4 <= 64); + + ConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight); + ConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h); + } + + public static unsafe void Convolve8Avg( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + // Fixed size intermediate buffer places limits on parameters. + byte* temp = stackalloc byte[64 * 64]; + Debug.Assert(w <= 64); + Debug.Assert(h <= 64); + + Convolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + ConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h); + } + + public static unsafe void ConvolveCopy( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + int r; + + for (r = h; r > 0; --r) + { + MemoryUtil.Copy(dst, src, w); + src += srcStride; + dst += dstStride; + } + } + + public static unsafe void ConvolveAvg( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + int x, y; + + for (y = 0; y < h; ++y) + { + for (x = 0; x < w; ++x) + { + dst[x] = (byte)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1); + } + + src += srcStride; + dst += dstStride; + } + } + + public static unsafe void ScaledHoriz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8Horiz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + public static unsafe void ScaledVert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8Vert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + public static unsafe void Scaled2D( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + public static unsafe void ScaledAvgHoriz( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8AvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + public static unsafe void ScaledAvgVert( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8AvgVert(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + public static unsafe void ScaledAvg2D( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h) + { + Convolve8Avg(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h); + } + + private static unsafe void HighbdConvolveHoriz( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] xFilters, + int x0Q4, + int xStepQ4, + int w, + int h, + int bd) + { + int x, y; + src -= SubpelTaps / 2 - 1; + + for (y = 0; y < h; ++y) + { + int xQ4 = x0Q4; + for (x = 0; x < w; ++x) + { + ushort* srcX = &src[xQ4 >> SubpelBits]; + ref Array8 xFilter = ref xFilters[xQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcX[k] * xFilter[k]; + } + + dst[x] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd); + xQ4 += xStepQ4; + } + src += srcStride; + dst += dstStride; + } + } + + private static unsafe void HighbdConvolveAvgHoriz( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] xFilters, + int x0Q4, + int xStepQ4, + int w, + int h, + int bd) + { + int x, y; + src -= SubpelTaps / 2 - 1; + + for (y = 0; y < h; ++y) + { + int xQ4 = x0Q4; + for (x = 0; x < w; ++x) + { + ushort* srcX = &src[xQ4 >> SubpelBits]; + ref Array8 xFilter = ref xFilters[xQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcX[k] * xFilter[k]; + } + + dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1); + xQ4 += xStepQ4; + } + src += srcStride; + dst += dstStride; + } + } + + private static unsafe void HighbdConvolveVert( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] yFilters, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + int x, y; + src -= srcStride * (SubpelTaps / 2 - 1); + + for (x = 0; x < w; ++x) + { + int yQ4 = y0Q4; + for (y = 0; y < h; ++y) + { + ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride]; + ref Array8 yFilter = ref yFilters[yQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcY[k * srcStride] * yFilter[k]; + } + + dst[y * dstStride] = BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd); + yQ4 += yStepQ4; + } + ++src; + ++dst; + } + } + + private static unsafe void HighConvolveAvgVert( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] yFilters, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + int x, y; + src -= srcStride * (SubpelTaps / 2 - 1); + + for (x = 0; x < w; ++x) + { + int yQ4 = y0Q4; + for (y = 0; y < h; ++y) + { + ushort* srcY = &src[(yQ4 >> SubpelBits) * srcStride]; + ref Array8 yFilter = ref yFilters[yQ4 & SubpelMask]; + int k, sum = 0; + for (k = 0; k < SubpelTaps; ++k) + { + sum += srcY[k * srcStride] * yFilter[k]; + } + + dst[y * dstStride] = (ushort)BitUtils.RoundPowerOfTwo( + dst[y * dstStride] + BitUtils.ClipPixelHighbd(BitUtils.RoundPowerOfTwo(sum, FilterBits), bd), 1); + yQ4 += yStepQ4; + } + ++src; + ++dst; + } + } + + private static unsafe void HighbdConvolve( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + // Note: Fixed size intermediate buffer, temp, places limits on parameters. + // 2d filtering proceeds in 2 steps: + // (1) Interpolate horizontally into an intermediate buffer, temp. + // (2) Interpolate temp vertically to derive the sub-pixel result. + // Deriving the maximum number of rows in the temp buffer (135): + // --Smallest scaling factor is x1/2 ==> yStepQ4 = 32 (Normative). + // --Largest block size is 64x64 pixels. + // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the + // original frame (in 1/16th pixel units). + // --Must round-up because block may be located at sub-pixel position. + // --Require an additional SubpelTaps rows for the 8-tap filter tails. + // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. + ushort* temp = stackalloc ushort[64 * 135]; + int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps; + + Debug.Assert(w <= 64); + Debug.Assert(h <= 64); + Debug.Assert(yStepQ4 <= 32); + Debug.Assert(xStepQ4 <= 32); + + HighbdConvolveHoriz(src - srcStride * (SubpelTaps / 2 - 1), srcStride, temp, 64, filter, x0Q4, xStepQ4, w, intermediateHeight, bd); + HighbdConvolveVert(temp + 64 * (SubpelTaps / 2 - 1), 64, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8Horiz( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + HighbdConvolveHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8AvgHoriz( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + HighbdConvolveAvgHoriz(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8Vert( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + HighbdConvolveVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8AvgVert( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + HighConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + HighbdConvolve(src, srcStride, dst, dstStride, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd); + } + + public static unsafe void HighbdConvolve8Avg( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + // Fixed size intermediate buffer places limits on parameters. + ushort* temp = stackalloc ushort[64 * 64]; + Debug.Assert(w <= 64); + Debug.Assert(h <= 64); + + HighbdConvolve8(src, srcStride, temp, 64, filter, x0Q4, xStepQ4, y0Q4, yStepQ4, w, h, bd); + HighbdConvolveAvg(temp, 64, dst, dstStride, null, 0, 0, 0, 0, w, h, bd); + } + + public static unsafe void HighbdConvolveCopy( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + int r; + + for (r = h; r > 0; --r) + { + MemoryUtil.Copy(dst, src, w); + src += srcStride; + dst += dstStride; + } + } + + public static unsafe void HighbdConvolveAvg( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd) + { + int x, y; + + for (y = 0; y < h; ++y) + { + for (x = 0; x < w; ++x) + { + dst[x] = (ushort)BitUtils.RoundPowerOfTwo(dst[x] + src[x], 1); + } + + src += srcStride; + dst += dstStride; + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs new file mode 100644 index 00000000..16962897 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Filter.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class Filter + { + public const int FilterBits = 7; + + public const int SubpelBits = 4; + public const int SubpelMask = (1 << SubpelBits) - 1; + public const int SubpelShifts = 1 << SubpelBits; + public const int SubpelTaps = 8; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs new file mode 100644 index 00000000..62b3a9b1 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/IntraPred.cs @@ -0,0 +1,1379 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class IntraPred + { + private static unsafe ref byte Dst(byte* dst, int stride, int x, int y) + { + return ref dst[x + y * stride]; + } + + private static unsafe ref ushort Dst(ushort* dst, int stride, int x, int y) + { + return ref dst[x + y * stride]; + } + + private static byte Avg3(byte a, byte b, byte c) + { + return (byte)((a + 2 * b + c + 2) >> 2); + } + + private static ushort Avg3(ushort a, ushort b, ushort c) + { + return (ushort)((a + 2 * b + c + 2) >> 2); + } + + private static byte Avg2(byte a, byte b) + { + return (byte)((a + b + 1) >> 1); + } + + private static ushort Avg2(ushort a, ushort b) + { + return (ushort)((a + b + 1) >> 1); + } + + public static unsafe void D207Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D207Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D207Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D207Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D207Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D207Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D207Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r, c; + // First column + for (r = 0; r < bs - 1; ++r) + { + dst[r * stride] = Avg2(left[r], left[r + 1]); + } + + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Second column + for (r = 0; r < bs - 2; ++r) + { + dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]); + } + + dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Rest of last row + for (c = 0; c < bs - 2; ++c) + { + dst[(bs - 1) * stride + c] = left[bs - 1]; + } + + for (r = bs - 2; r >= 0; --r) + { + for (c = 0; c < bs - 2; ++c) + { + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; + } + } + } + + public static unsafe void D63Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D63Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D63Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D63Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D63Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D63Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D63Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r, c; + int size; + for (c = 0; c < bs; ++c) + { + dst[c] = Avg2(above[c], above[c + 1]); + dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]); + } + for (r = 2, size = bs - 2; r < bs; r += 2, --size) + { + MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size); + MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size); + MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); + MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size); + } + } + + public static unsafe void D45Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D45Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D45Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D45Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D45Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D45Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D45Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + byte aboveRight = above[bs - 1]; + byte* dstRow0 = dst; + int x, size; + + for (x = 0; x < bs - 1; ++x) + { + dst[x] = Avg3(above[x], above[x + 1], above[x + 2]); + } + dst[bs - 1] = aboveRight; + dst += stride; + for (x = 1, size = bs - 2; x < bs; ++x, --size) + { + MemoryUtil.Copy(dst, dstRow0 + x, size); + MemoryUtil.Fill(dst + size, aboveRight, x + 1); + dst += stride; + } + } + + public static unsafe void D117Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D117Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D117Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D117Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D117Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D117Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D117Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r, c; + + // First row + for (c = 0; c < bs; c++) + { + dst[c] = Avg2(above[c - 1], above[c]); + } + + dst += stride; + + // Second row + dst[0] = Avg3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + { + dst[c] = Avg3(above[c - 2], above[c - 1], above[c]); + } + + dst += stride; + + // The rest of first col + dst[0] = Avg3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + { + dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]); + } + + // The rest of the block + for (r = 2; r < bs; ++r) + { + for (c = 1; c < bs; c++) + { + dst[c] = dst[-2 * stride + c - 1]; + } + + dst += stride; + } + } + + public static unsafe void D135Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D135Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D135Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D135Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D135Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D135Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D135Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int i; + byte* border = stackalloc byte[32 + 32 - 1]; // outer border from bottom-left to top-right + + // Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left + for (i = 0; i < bs - 2; ++i) + { + border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); + } + border[bs - 2] = Avg3(above[-1], left[0], left[1]); + border[bs - 1] = Avg3(left[0], above[-1], above[0]); + border[bs - 0] = Avg3(above[-1], above[0], above[1]); + // dst[0][2, size), i.e., remaining top border ascending + for (i = 0; i < bs - 2; ++i) + { + border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]); + } + + for (i = 0; i < bs; ++i) + { + MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs); + } + } + + public static unsafe void D153Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + D153Predictor(dst, stride, 8, above, left); + } + + public static unsafe void D153Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + D153Predictor(dst, stride, 16, above, left); + } + + public static unsafe void D153Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + D153Predictor(dst, stride, 32, above, left); + } + + private static unsafe void D153Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r, c; + dst[0] = Avg2(above[-1], left[0]); + for (r = 1; r < bs; r++) + { + dst[r * stride] = Avg2(left[r - 1], left[r]); + } + + dst++; + + dst[0] = Avg3(left[0], above[-1], above[0]); + dst[stride] = Avg3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + { + dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]); + } + + dst++; + + for (c = 0; c < bs - 2; c++) + { + dst[c] = Avg3(above[c - 1], above[c], above[c + 1]); + } + + dst += stride; + + for (r = 1; r < bs; ++r) + { + for (c = 0; c < bs - 2; c++) + { + dst[c] = dst[-stride + c - 2]; + } + + dst += stride; + } + } + + public static unsafe void VPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + VPredictor(dst, stride, 4, above, left); + } + + public static unsafe void VPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + VPredictor(dst, stride, 8, above, left); + } + + public static unsafe void VPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + VPredictor(dst, stride, 16, above, left); + } + + public static unsafe void VPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + VPredictor(dst, stride, 32, above, left); + } + + private static unsafe void VPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Copy(dst, above, bs); + dst += stride; + } + } + + public static unsafe void HPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + HPredictor(dst, stride, 4, above, left); + } + + public static unsafe void HPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + HPredictor(dst, stride, 8, above, left); + } + + public static unsafe void HPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + HPredictor(dst, stride, 16, above, left); + } + + public static unsafe void HPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + HPredictor(dst, stride, 32, above, left); + } + + private static unsafe void HPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, left[r], bs); + dst += stride; + } + } + + public static unsafe void TMPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + TMPredictor(dst, stride, 4, above, left); + } + + public static unsafe void TMPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + TMPredictor(dst, stride, 8, above, left); + } + + public static unsafe void TMPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + TMPredictor(dst, stride, 16, above, left); + } + + public static unsafe void TMPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + TMPredictor(dst, stride, 32, above, left); + } + + private static unsafe void TMPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r, c; + int yTopLeft = above[-1]; + + for (r = 0; r < bs; r++) + { + for (c = 0; c < bs; c++) + { + dst[c] = BitUtils.ClipPixel(left[r] + above[c] - yTopLeft); + } + + dst += stride; + } + } + + public static unsafe void Dc128Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + Dc128Predictor(dst, stride, 4, above, left); + } + + public static unsafe void Dc128Predictor8x8(byte* dst, int stride, byte* above, byte* left) + { + Dc128Predictor(dst, stride, 8, above, left); + } + + public static unsafe void Dc128Predictor16x16(byte* dst, int stride, byte* above, byte* left) + { + Dc128Predictor(dst, stride, 16, above, left); + } + + public static unsafe void Dc128Predictor32x32(byte* dst, int stride, byte* above, byte* left) + { + Dc128Predictor(dst, stride, 32, above, left); + } + + private static unsafe void Dc128Predictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int r; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (byte)128, bs); + dst += stride; + } + } + + public static unsafe void DcLeftPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + DcLeftPredictor(dst, stride, 4, above, left); + } + + public static unsafe void DcLeftPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + DcLeftPredictor(dst, stride, 8, above, left); + } + + public static unsafe void DcLeftPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + DcLeftPredictor(dst, stride, 16, above, left); + } + + public static unsafe void DcLeftPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + DcLeftPredictor(dst, stride, 32, above, left); + } + + private static unsafe void DcLeftPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int i, r, expectedDc, sum = 0; + + for (i = 0; i < bs; i++) + { + sum += left[i]; + } + + expectedDc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (byte)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void DcTopPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + DcTopPredictor(dst, stride, 4, above, left); + } + + public static unsafe void DcTopPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + DcTopPredictor(dst, stride, 8, above, left); + } + + public static unsafe void DcTopPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + DcTopPredictor(dst, stride, 16, above, left); + } + + public static unsafe void DcTopPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + DcTopPredictor(dst, stride, 32, above, left); + } + + private static unsafe void DcTopPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int i, r, expectedDc, sum = 0; + + for (i = 0; i < bs; i++) + { + sum += above[i]; + } + + expectedDc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (byte)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void DcPredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + DcPredictor(dst, stride, 4, above, left); + } + + public static unsafe void DcPredictor8x8(byte* dst, int stride, byte* above, byte* left) + { + DcPredictor(dst, stride, 8, above, left); + } + + public static unsafe void DcPredictor16x16(byte* dst, int stride, byte* above, byte* left) + { + DcPredictor(dst, stride, 16, above, left); + } + + public static unsafe void DcPredictor32x32(byte* dst, int stride, byte* above, byte* left) + { + DcPredictor(dst, stride, 32, above, left); + } + + private static unsafe void DcPredictor(byte* dst, int stride, int bs, byte* above, byte* left) + { + int i, r, expectedDc, sum = 0; + int count = 2 * bs; + + for (i = 0; i < bs; i++) + { + sum += above[i]; + sum += left[i]; + } + + expectedDc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (byte)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void HePredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte h = above[-1]; + byte I = left[0]; + byte j = left[1]; + byte k = left[2]; + byte l = left[3]; + + MemoryUtil.Fill(dst + stride * 0, Avg3(h, I, j), 4); + MemoryUtil.Fill(dst + stride * 1, Avg3(I, j, k), 4); + MemoryUtil.Fill(dst + stride * 2, Avg3(j, k, l), 4); + MemoryUtil.Fill(dst + stride * 3, Avg3(k, l, l), 4); + } + + public static unsafe void VePredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte h = above[-1]; + byte I = above[0]; + byte j = above[1]; + byte k = above[2]; + byte l = above[3]; + byte m = above[4]; + + dst[0] = Avg3(h, I, j); + dst[1] = Avg3(I, j, k); + dst[2] = Avg3(j, k, l); + dst[3] = Avg3(k, l, m); + MemoryUtil.Copy(dst + stride * 1, dst, 4); + MemoryUtil.Copy(dst + stride * 2, dst, 4); + MemoryUtil.Copy(dst + stride * 3, dst, 4); + } + + public static unsafe void D207Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte I = left[0]; + byte j = left[1]; + byte k = left[2]; + byte l = left[3]; + Dst(dst, stride, 0, 0) = Avg2(I, j); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 0, 1) = Avg2(j, k); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 0, 2) = Avg2(k, l); + Dst(dst, stride, 1, 0) = Avg3(I, j, k); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l; + } + + public static unsafe void D63Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + byte e = above[4]; + byte f = above[5]; + byte g = above[6]; + Dst(dst, stride, 0, 0) = Avg2(a, b); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e); + Dst(dst, stride, 3, 2) = Avg2(e, f); // Differs from vp8 + + Dst(dst, stride, 0, 1) = Avg3(a, b, c); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8 + } + + public static unsafe void D63ePredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + byte e = above[4]; + byte f = above[5]; + byte g = above[6]; + byte h = above[7]; + Dst(dst, stride, 0, 0) = Avg2(a, b); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e); + Dst(dst, stride, 3, 2) = Avg3(e, f, g); + + Dst(dst, stride, 0, 1) = Avg3(a, b, c); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 3) = Avg3(f, g, h); + } + + public static unsafe void D45Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + byte e = above[4]; + byte f = above[5]; + byte g = above[6]; + byte h = above[7]; + Dst(dst, stride, 0, 0) = Avg3(a, b, c); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h); + Dst(dst, stride, 3, 3) = h; // differs from vp8 + } + + public static unsafe void D45ePredictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + byte e = above[4]; + byte f = above[5]; + byte g = above[6]; + byte h = above[7]; + Dst(dst, stride, 0, 0) = Avg3(a, b, c); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h); + Dst(dst, stride, 3, 3) = Avg3(g, h, h); + } + + public static unsafe void D117Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte I = left[0]; + byte j = left[1]; + byte k = left[2]; + byte x = above[-1]; + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + Dst(dst, stride, 0, 0) = Dst(dst, stride, 1, 2) = Avg2(x, a); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 2, 2) = Avg2(a, b); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 3, 2) = Avg2(b, c); + Dst(dst, stride, 3, 0) = Avg2(c, d); + + Dst(dst, stride, 0, 3) = Avg3(k, j, I); + Dst(dst, stride, 0, 2) = Avg3(j, I, x); + Dst(dst, stride, 0, 1) = Dst(dst, stride, 1, 3) = Avg3(I, x, a); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 2, 3) = Avg3(x, a, b); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 3, 3) = Avg3(a, b, c); + Dst(dst, stride, 3, 1) = Avg3(b, c, d); + } + + public static unsafe void D135Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte I = left[0]; + byte j = left[1]; + byte k = left[2]; + byte l = left[3]; + byte x = above[-1]; + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + byte d = above[3]; + Dst(dst, stride, 0, 3) = Avg3(j, k, l); + Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k); + Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j); + Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a); + Dst(dst, stride, 3, 0) = Avg3(d, c, b); + } + + public static unsafe void D153Predictor4x4(byte* dst, int stride, byte* above, byte* left) + { + byte I = left[0]; + byte j = left[1]; + byte k = left[2]; + byte l = left[3]; + byte x = above[-1]; + byte a = above[0]; + byte b = above[1]; + byte c = above[2]; + Dst(dst, stride, 0, 0) = Dst(dst, stride, 2, 1) = Avg2(I, x); + Dst(dst, stride, 0, 1) = Dst(dst, stride, 2, 2) = Avg2(j, I); + Dst(dst, stride, 0, 2) = Dst(dst, stride, 2, 3) = Avg2(k, j); + Dst(dst, stride, 0, 3) = Avg2(l, k); + + Dst(dst, stride, 3, 0) = Avg3(a, b, c); + Dst(dst, stride, 2, 0) = Avg3(x, a, b); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 3, 1) = Avg3(I, x, a); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 3, 2) = Avg3(j, I, x); + Dst(dst, stride, 1, 2) = Dst(dst, stride, 3, 3) = Avg3(k, j, I); + Dst(dst, stride, 1, 3) = Avg3(l, k, j); + } + + public static unsafe void HighbdD207Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD207Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD207Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD207Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD207Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD207Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD207Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r, c; + + // First column. + for (r = 0; r < bs - 1; ++r) + { + dst[r * stride] = Avg2(left[r], left[r + 1]); + } + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Second column. + for (r = 0; r < bs - 2; ++r) + { + dst[r * stride] = Avg3(left[r], left[r + 1], left[r + 2]); + } + dst[(bs - 2) * stride] = Avg3(left[bs - 2], left[bs - 1], left[bs - 1]); + dst[(bs - 1) * stride] = left[bs - 1]; + dst++; + + // Rest of last row. + for (c = 0; c < bs - 2; ++c) + { + dst[(bs - 1) * stride + c] = left[bs - 1]; + } + + for (r = bs - 2; r >= 0; --r) + { + for (c = 0; c < bs - 2; ++c) + { + dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; + } + } + } + + public static unsafe void HighbdD63Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD63Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD63Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD63Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD63Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD63Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD63Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r, c; + int size; + for (c = 0; c < bs; ++c) + { + dst[c] = Avg2(above[c], above[c + 1]); + dst[stride + c] = Avg3(above[c], above[c + 1], above[c + 2]); + } + for (r = 2, size = bs - 2; r < bs; r += 2, --size) + { + MemoryUtil.Copy(dst + (r + 0) * stride, dst + (r >> 1), size); + MemoryUtil.Fill(dst + (r + 0) * stride + size, above[bs - 1], bs - size); + MemoryUtil.Copy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); + MemoryUtil.Fill(dst + (r + 1) * stride + size, above[bs - 1], bs - size); + } + } + + public static unsafe void HighbdD45Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD45Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD45Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD45Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD45Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD45Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD45Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + ushort aboveRight = above[bs - 1]; + ushort* dstRow0 = dst; + int x, size; + + for (x = 0; x < bs - 1; ++x) + { + dst[x] = Avg3(above[x], above[x + 1], above[x + 2]); + } + dst[bs - 1] = aboveRight; + dst += stride; + for (x = 1, size = bs - 2; x < bs; ++x, --size) + { + MemoryUtil.Copy(dst, dstRow0 + x, size); + MemoryUtil.Fill(dst + size, aboveRight, x + 1); + dst += stride; + } + } + + public static unsafe void HighbdD117Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD117Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD117Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD117Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD117Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD117Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD117Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r, c; + + // First row + for (c = 0; c < bs; c++) + { + dst[c] = Avg2(above[c - 1], above[c]); + } + + dst += stride; + + // Second row + dst[0] = Avg3(left[0], above[-1], above[0]); + for (c = 1; c < bs; c++) + { + dst[c] = Avg3(above[c - 2], above[c - 1], above[c]); + } + + dst += stride; + + // The rest of first col + dst[0] = Avg3(above[-1], left[0], left[1]); + for (r = 3; r < bs; ++r) + { + dst[(r - 2) * stride] = Avg3(left[r - 3], left[r - 2], left[r - 1]); + } + + // The rest of the block + for (r = 2; r < bs; ++r) + { + for (c = 1; c < bs; c++) + { + dst[c] = dst[-2 * stride + c - 1]; + } + + dst += stride; + } + } + + public static unsafe void HighbdD135Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD135Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD135Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD135Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD135Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD135Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD135Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int i; + ushort* border = stackalloc ushort[32 + 32 - 1]; // Outer border from bottom-left to top-right + + // Dst(dst, stride, bs, bs - 2)[0], i.e., border starting at bottom-left + for (i = 0; i < bs - 2; ++i) + { + border[i] = Avg3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); + } + border[bs - 2] = Avg3(above[-1], left[0], left[1]); + border[bs - 1] = Avg3(left[0], above[-1], above[0]); + border[bs - 0] = Avg3(above[-1], above[0], above[1]); + // dst[0][2, size), i.e., remaining top border ascending + for (i = 0; i < bs - 2; ++i) + { + border[bs + 1 + i] = Avg3(above[i], above[i + 1], above[i + 2]); + } + + for (i = 0; i < bs; ++i) + { + MemoryUtil.Copy(dst + i * stride, border + bs - 1 - i, bs); + } + } + + public static unsafe void HighbdD153Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD153Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdD153Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD153Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdD153Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdD153Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdD153Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r, c; + dst[0] = Avg2(above[-1], left[0]); + for (r = 1; r < bs; r++) + { + dst[r * stride] = Avg2(left[r - 1], left[r]); + } + + dst++; + + dst[0] = Avg3(left[0], above[-1], above[0]); + dst[stride] = Avg3(above[-1], left[0], left[1]); + for (r = 2; r < bs; r++) + { + dst[r * stride] = Avg3(left[r - 2], left[r - 1], left[r]); + } + + dst++; + + for (c = 0; c < bs - 2; c++) + { + dst[c] = Avg3(above[c - 1], above[c], above[c + 1]); + } + + dst += stride; + + for (r = 1; r < bs; ++r) + { + for (c = 0; c < bs - 2; c++) + { + dst[c] = dst[-stride + c - 2]; + } + + dst += stride; + } + } + + public static unsafe void HighbdVPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdVPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdVPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdVPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdVPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdVPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdVPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdVPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdVPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r; + for (r = 0; r < bs; r++) + { + MemoryUtil.Copy(dst, above, bs); + dst += stride; + } + } + + public static unsafe void HighbdHPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdHPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdHPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdHPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdHPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdHPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdHPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdHPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdHPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r; + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, left[r], bs); + dst += stride; + } + } + + public static unsafe void HighbdTMPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdTMPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdTMPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdTMPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdTMPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdTMPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdTMPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdTMPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdTMPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r, c; + int yTopLeft = above[-1]; + + for (r = 0; r < bs; r++) + { + for (c = 0; c < bs; c++) + { + dst[c] = BitUtils.ClipPixelHighbd(left[r] + above[c] - yTopLeft, bd); + } + + dst += stride; + } + } + + public static unsafe void HighbdDc128Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDc128Predictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdDc128Predictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDc128Predictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdDc128Predictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDc128Predictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdDc128Predictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDc128Predictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdDc128Predictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int r; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (ushort)(128 << (bd - 8)), bs); + dst += stride; + } + } + + public static unsafe void HighbdDcLeftPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcLeftPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdDcLeftPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcLeftPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdDcLeftPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcLeftPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdDcLeftPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcLeftPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdDcLeftPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int i, r, expectedDc, sum = 0; + + for (i = 0; i < bs; i++) + { + sum += left[i]; + } + + expectedDc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (ushort)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void HighbdDcTopPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcTopPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdDcTopPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcTopPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdDcTopPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcTopPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdDcTopPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcTopPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdDcTopPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int i, r, expectedDc, sum = 0; + + for (i = 0; i < bs; i++) + { + sum += above[i]; + } + + expectedDc = (sum + (bs >> 1)) / bs; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (ushort)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void HighbdDcPredictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcPredictor(dst, stride, 4, above, left, bd); + } + + public static unsafe void HighbdDcPredictor8x8(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcPredictor(dst, stride, 8, above, left, bd); + } + + public static unsafe void HighbdDcPredictor16x16(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcPredictor(dst, stride, 16, above, left, bd); + } + + public static unsafe void HighbdDcPredictor32x32(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + HighbdDcPredictor(dst, stride, 32, above, left, bd); + } + + private static unsafe void HighbdDcPredictor(ushort* dst, int stride, int bs, ushort* above, ushort* left, int bd) + { + int i, r, expectedDc, sum = 0; + int count = 2 * bs; + + for (i = 0; i < bs; i++) + { + sum += above[i]; + sum += left[i]; + } + + expectedDc = (sum + (count >> 1)) / count; + + for (r = 0; r < bs; r++) + { + MemoryUtil.Fill(dst, (ushort)expectedDc, bs); + dst += stride; + } + } + + public static unsafe void HighbdD207Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort I = left[0]; + ushort j = left[1]; + ushort k = left[2]; + ushort l = left[3]; + Dst(dst, stride, 0, 0) = Avg2(I, j); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 0, 1) = Avg2(j, k); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 0, 2) = Avg2(k, l); + Dst(dst, stride, 1, 0) = Avg3(I, j, k); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 1, 1) = Avg3(j, k, l); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 1, 2) = Avg3(k, l, l); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 0, 3) = Dst(dst, stride, 1, 3) = Dst(dst, stride, 2, 3) = Dst(dst, stride, 3, 3) = l; + } + + public static unsafe void HighbdD63Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort a = above[0]; + ushort b = above[1]; + ushort c = above[2]; + ushort d = above[3]; + ushort e = above[4]; + ushort f = above[5]; + ushort g = above[6]; + Dst(dst, stride, 0, 0) = Avg2(a, b); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 2) = Avg2(b, c); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 2) = Avg2(c, d); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 2) = Avg2(d, e); + Dst(dst, stride, 3, 2) = Avg2(e, f); // Differs from vp8 + + Dst(dst, stride, 0, 1) = Avg3(a, b, c); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 3) = Avg3(b, c, d); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 3) = Avg3(c, d, e); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 3) = Avg3(e, f, g); // Differs from vp8 + } + + public static unsafe void HighbdD45Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort a = above[0]; + ushort b = above[1]; + ushort c = above[2]; + ushort d = above[3]; + ushort e = above[4]; + ushort f = above[5]; + ushort g = above[6]; + ushort h = above[7]; + Dst(dst, stride, 0, 0) = Avg3(a, b, c); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 0, 1) = Avg3(b, c, d); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 2) = Avg3(c, d, e); + Dst(dst, stride, 3, 0) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 3) = Avg3(d, e, f); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 3) = Avg3(e, f, g); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 3) = Avg3(f, g, h); + Dst(dst, stride, 3, 3) = h; // Differs from vp8 + } + + public static unsafe void HighbdD117Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort I = left[0]; + ushort j = left[1]; + ushort k = left[2]; + ushort x = above[-1]; + ushort a = above[0]; + ushort b = above[1]; + ushort c = above[2]; + ushort d = above[3]; + Dst(dst, stride, 0, 0) = Dst(dst, stride, 1, 2) = Avg2(x, a); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 2, 2) = Avg2(a, b); + Dst(dst, stride, 2, 0) = Dst(dst, stride, 3, 2) = Avg2(b, c); + Dst(dst, stride, 3, 0) = Avg2(c, d); + + Dst(dst, stride, 0, 3) = Avg3(k, j, I); + Dst(dst, stride, 0, 2) = Avg3(j, I, x); + Dst(dst, stride, 0, 1) = Dst(dst, stride, 1, 3) = Avg3(I, x, a); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 2, 3) = Avg3(x, a, b); + Dst(dst, stride, 2, 1) = Dst(dst, stride, 3, 3) = Avg3(a, b, c); + Dst(dst, stride, 3, 1) = Avg3(b, c, d); + } + + public static unsafe void HighbdD135Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort I = left[0]; + ushort j = left[1]; + ushort k = left[2]; + ushort l = left[3]; + ushort x = above[-1]; + ushort a = above[0]; + ushort b = above[1]; + ushort c = above[2]; + ushort d = above[3]; + Dst(dst, stride, 0, 3) = Avg3(j, k, l); + Dst(dst, stride, 1, 3) = Dst(dst, stride, 0, 2) = Avg3(I, j, k); + Dst(dst, stride, 2, 3) = Dst(dst, stride, 1, 2) = Dst(dst, stride, 0, 1) = Avg3(x, I, j); + Dst(dst, stride, 3, 3) = Dst(dst, stride, 2, 2) = Dst(dst, stride, 1, 1) = Dst(dst, stride, 0, 0) = Avg3(a, x, I); + Dst(dst, stride, 3, 2) = Dst(dst, stride, 2, 1) = Dst(dst, stride, 1, 0) = Avg3(b, a, x); + Dst(dst, stride, 3, 1) = Dst(dst, stride, 2, 0) = Avg3(c, b, a); + Dst(dst, stride, 3, 0) = Avg3(d, c, b); + } + + public static unsafe void HighbdD153Predictor4x4(ushort* dst, int stride, ushort* above, ushort* left, int bd) + { + ushort I = left[0]; + ushort j = left[1]; + ushort k = left[2]; + ushort l = left[3]; + ushort x = above[-1]; + ushort a = above[0]; + ushort b = above[1]; + ushort c = above[2]; + + Dst(dst, stride, 0, 0) = Dst(dst, stride, 2, 1) = Avg2(I, x); + Dst(dst, stride, 0, 1) = Dst(dst, stride, 2, 2) = Avg2(j, I); + Dst(dst, stride, 0, 2) = Dst(dst, stride, 2, 3) = Avg2(k, j); + Dst(dst, stride, 0, 3) = Avg2(l, k); + + Dst(dst, stride, 3, 0) = Avg3(a, b, c); + Dst(dst, stride, 2, 0) = Avg3(x, a, b); + Dst(dst, stride, 1, 0) = Dst(dst, stride, 3, 1) = Avg3(I, x, a); + Dst(dst, stride, 1, 1) = Dst(dst, stride, 3, 2) = Avg3(j, I, x); + Dst(dst, stride, 1, 2) = Dst(dst, stride, 3, 3) = Avg3(k, j, I); + Dst(dst, stride, 1, 3) = Avg3(l, k, j); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs new file mode 100644 index 00000000..b4ad4344 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/InvTxfm.cs @@ -0,0 +1,2868 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.TxfmCommon; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class InvTxfm + { + // 12 signal input bits + 7 2D forward transform amplify bits + 5 1D inverse + // transform amplify bits + 1 bit for contingency in rounding and quantizing + private const int HighbdValidTxfmMagnitudeRange = (1 << 25); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int DetectInvalidHighbdInput(ReadOnlySpan input, int size) + { + int i; + for (i = 0; i < size; ++i) + { + if (Math.Abs(input[i]) >= HighbdValidTxfmMagnitudeRange) + { + return 1; + } + } + + return 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static long CheckRange(long input) + { + // For valid VP9 input streams, intermediate stage coefficients should always + // stay within the range of a signed 16 bit integer. Coefficients can go out + // of this range for invalid/corrupt VP9 streams. + Debug.Assert(short.MinValue <= input); + Debug.Assert(input <= short.MaxValue); + return input; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long HighbdCheckRange(long input, int bd) + { + // For valid highbitdepth VP9 streams, intermediate stage coefficients will + // stay within the ranges: + // - 8 bit: signed 16 bit integer + // - 10 bit: signed 18 bit integer + // - 12 bit: signed 20 bit integer + int intMax = (1 << (7 + bd)) - 1; + int intMin = -intMax - 1; + Debug.Assert(intMin <= input); + Debug.Assert(input <= intMax); + + return input; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int WrapLow(long x) + { + return (short)CheckRange(x); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int HighbdWrapLow(long x, int bd) + { + return ((int)HighbdCheckRange(x, bd) << (24 - bd)) >> (24 - bd); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static byte ClipPixelAdd(byte dest, long trans) + { + trans = WrapLow(trans); + return BitUtils.ClipPixel(dest + (int)trans); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ushort HighbdClipPixelAdd(ushort dest, long trans, int bd) + { + trans = HighbdWrapLow(trans, bd); + return BitUtils.ClipPixelHighbd(dest + (int)trans, bd); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static long DctConstRoundShift(long input) + { + long rv = BitUtils.RoundPowerOfTwo(input, DctConstBits); + return rv; + } + + public static void Iwht4x416Add(ReadOnlySpan input, Span dest, int stride) + { + /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ + int i; + Span output = stackalloc int[16]; + long a1, b1, c1, d1, e1; + ReadOnlySpan ip = input; + Span op = output; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] >> UnitQuantShift; + c1 = ip[1] >> UnitQuantShift; + d1 = ip[2] >> UnitQuantShift; + b1 = ip[3] >> UnitQuantShift; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + op[0] = WrapLow(a1); + op[1] = WrapLow(b1); + op[2] = WrapLow(c1); + op[3] = WrapLow(d1); + ip = ip.Slice(4); + op = op.Slice(4); + } + + Span ip2 = output; + for (i = 0; i < 4; i++) + { + a1 = ip2[4 * 0]; + c1 = ip2[4 * 1]; + d1 = ip2[4 * 2]; + b1 = ip2[4 * 3]; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + dest[stride * 0] = ClipPixelAdd(dest[stride * 0], WrapLow(a1)); + dest[stride * 1] = ClipPixelAdd(dest[stride * 1], WrapLow(b1)); + dest[stride * 2] = ClipPixelAdd(dest[stride * 2], WrapLow(c1)); + dest[stride * 3] = ClipPixelAdd(dest[stride * 3], WrapLow(d1)); + + ip2 = ip2.Slice(1); + dest = dest.Slice(1); + } + } + + public static void Iwht4x41Add(ReadOnlySpan input, Span dest, int stride) + { + int i; + long a1, e1; + Span tmp = stackalloc int[4]; + ReadOnlySpan ip = input; + Span op = tmp; + + a1 = ip[0] >> UnitQuantShift; + e1 = a1 >> 1; + a1 -= e1; + op[0] = WrapLow(a1); + op[1] = op[2] = op[3] = WrapLow(e1); + + Span ip2 = tmp; + for (i = 0; i < 4; i++) + { + e1 = ip2[0] >> 1; + a1 = ip2[0] - e1; + dest[stride * 0] = ClipPixelAdd(dest[stride * 0], a1); + dest[stride * 1] = ClipPixelAdd(dest[stride * 1], e1); + dest[stride * 2] = ClipPixelAdd(dest[stride * 2], e1); + dest[stride * 3] = ClipPixelAdd(dest[stride * 3], e1); + ip2 = ip2.Slice(1); + dest = dest.Slice(1); + } + } + + public static void Iadst4(ReadOnlySpan input, Span output) + { + long s0, s1, s2, s3, s4, s5, s6, s7; + int x0 = input[0]; + int x1 = input[1]; + int x2 = input[2]; + int x3 = input[3]; + + if ((x0 | x1 | x2 | x3) == 0) + { + output.Slice(0, 4).Fill(0); + return; + } + + // 32-bit result is enough for the following multiplications. + s0 = SinPi1_9 * x0; + s1 = SinPi2_9 * x0; + s2 = SinPi3_9 * x1; + s3 = SinPi4_9 * x2; + s4 = SinPi1_9 * x2; + s5 = SinPi2_9 * x3; + s6 = SinPi4_9 * x3; + s7 = WrapLow(x0 - x2 + x3); + + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = SinPi3_9 * s7; + + // 1-D transform scaling factor is sqrt(2). + // The overall dynamic range is 14b (input) + 14b (multiplication scaling) + // + 1b (addition) = 29b. + // Hence the output bit depth is 15b. + output[0] = WrapLow(DctConstRoundShift(s0 + s3)); + output[1] = WrapLow(DctConstRoundShift(s1 + s3)); + output[2] = WrapLow(DctConstRoundShift(s2)); + output[3] = WrapLow(DctConstRoundShift(s0 + s1 - s3)); + } + + public static void Idct4(ReadOnlySpan input, Span output) + { + Span step = stackalloc short[4]; + long temp1, temp2; + + // stage 1 + temp1 = ((short)input[0] + (short)input[2]) * CosPi16_64; + temp2 = ((short)input[0] - (short)input[2]) * CosPi16_64; + step[0] = (short)WrapLow(DctConstRoundShift(temp1)); + step[1] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (short)input[1] * CosPi24_64 - (short)input[3] * CosPi8_64; + temp2 = (short)input[1] * CosPi8_64 + (short)input[3] * CosPi24_64; + step[2] = (short)WrapLow(DctConstRoundShift(temp1)); + step[3] = (short)WrapLow(DctConstRoundShift(temp2)); + + // stage 2 + output[0] = WrapLow(step[0] + step[3]); + output[1] = WrapLow(step[1] + step[2]); + output[2] = WrapLow(step[1] - step[2]); + output[3] = WrapLow(step[0] - step[3]); + } + + public static void Idct4x416Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[4 * 4]; + Span outptr = output; + Span tempIn = stackalloc int[4]; + Span tempOut = stackalloc int[4]; + + // Rows + for (i = 0; i < 4; ++i) + { + Idct4(input, outptr); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Columns + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + Idct4(tempIn, tempOut); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4)); + } + } + } + + public static void Idct4x41Add(ReadOnlySpan input, Span dest, int stride) + { + int i; + long a1; + int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64)); + + output = WrapLow(DctConstRoundShift(output * CosPi16_64)); + a1 = BitUtils.RoundPowerOfTwo(output, 4); + + for (i = 0; i < 4; i++) + { + dest[0] = ClipPixelAdd(dest[0], a1); + dest[1] = ClipPixelAdd(dest[1], a1); + dest[2] = ClipPixelAdd(dest[2], a1); + dest[3] = ClipPixelAdd(dest[3], a1); + dest = dest.Slice(stride); + } + } + + public static void Iadst8(ReadOnlySpan input, Span output) + { + int s0, s1, s2, s3, s4, s5, s6, s7; + long x0 = input[7]; + long x1 = input[0]; + long x2 = input[5]; + long x3 = input[2]; + long x4 = input[3]; + long x5 = input[4]; + long x6 = input[1]; + long x7 = input[6]; + + if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7) == 0) + { + output.Slice(0, 8).Fill(0); + return; + } + + // stage 1 + s0 = (int)(CosPi2_64 * x0 + CosPi30_64 * x1); + s1 = (int)(CosPi30_64 * x0 - CosPi2_64 * x1); + s2 = (int)(CosPi10_64 * x2 + CosPi22_64 * x3); + s3 = (int)(CosPi22_64 * x2 - CosPi10_64 * x3); + s4 = (int)(CosPi18_64 * x4 + CosPi14_64 * x5); + s5 = (int)(CosPi14_64 * x4 - CosPi18_64 * x5); + s6 = (int)(CosPi26_64 * x6 + CosPi6_64 * x7); + s7 = (int)(CosPi6_64 * x6 - CosPi26_64 * x7); + + x0 = WrapLow(DctConstRoundShift(s0 + s4)); + x1 = WrapLow(DctConstRoundShift(s1 + s5)); + x2 = WrapLow(DctConstRoundShift(s2 + s6)); + x3 = WrapLow(DctConstRoundShift(s3 + s7)); + x4 = WrapLow(DctConstRoundShift(s0 - s4)); + x5 = WrapLow(DctConstRoundShift(s1 - s5)); + x6 = WrapLow(DctConstRoundShift(s2 - s6)); + x7 = WrapLow(DctConstRoundShift(s3 - s7)); + + // stage 2 + s0 = (int)x0; + s1 = (int)x1; + s2 = (int)x2; + s3 = (int)x3; + s4 = (int)(CosPi8_64 * x4 + CosPi24_64 * x5); + s5 = (int)(CosPi24_64 * x4 - CosPi8_64 * x5); + s6 = (int)(-CosPi24_64 * x6 + CosPi8_64 * x7); + s7 = (int)(CosPi8_64 * x6 + CosPi24_64 * x7); + + x0 = WrapLow(s0 + s2); + x1 = WrapLow(s1 + s3); + x2 = WrapLow(s0 - s2); + x3 = WrapLow(s1 - s3); + x4 = WrapLow(DctConstRoundShift(s4 + s6)); + x5 = WrapLow(DctConstRoundShift(s5 + s7)); + x6 = WrapLow(DctConstRoundShift(s4 - s6)); + x7 = WrapLow(DctConstRoundShift(s5 - s7)); + + // stage 3 + s2 = (int)(CosPi16_64 * (x2 + x3)); + s3 = (int)(CosPi16_64 * (x2 - x3)); + s6 = (int)(CosPi16_64 * (x6 + x7)); + s7 = (int)(CosPi16_64 * (x6 - x7)); + + x2 = WrapLow(DctConstRoundShift(s2)); + x3 = WrapLow(DctConstRoundShift(s3)); + x6 = WrapLow(DctConstRoundShift(s6)); + x7 = WrapLow(DctConstRoundShift(s7)); + + output[0] = WrapLow(x0); + output[1] = WrapLow(-x4); + output[2] = WrapLow(x6); + output[3] = WrapLow(-x2); + output[4] = WrapLow(x3); + output[5] = WrapLow(-x7); + output[6] = WrapLow(x5); + output[7] = WrapLow(-x1); + } + + public static void Idct8(ReadOnlySpan input, Span output) + { + Span step1 = stackalloc short[8]; + Span step2 = stackalloc short[8]; + long temp1, temp2; + + // stage 1 + step1[0] = (short)input[0]; + step1[2] = (short)input[4]; + step1[1] = (short)input[2]; + step1[3] = (short)input[6]; + temp1 = (short)input[1] * CosPi28_64 - (short)input[7] * CosPi4_64; + temp2 = (short)input[1] * CosPi4_64 + (short)input[7] * CosPi28_64; + step1[4] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[7] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (short)input[5] * CosPi12_64 - (short)input[3] * CosPi20_64; + temp2 = (short)input[5] * CosPi20_64 + (short)input[3] * CosPi12_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + + // stage 2 + temp1 = (step1[0] + step1[2]) * CosPi16_64; + temp2 = (step1[0] - step1[2]) * CosPi16_64; + step2[0] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[1] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = step1[1] * CosPi24_64 - step1[3] * CosPi8_64; + temp2 = step1[1] * CosPi8_64 + step1[3] * CosPi24_64; + step2[2] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[3] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[4] = (short)WrapLow(step1[4] + step1[5]); + step2[5] = (short)WrapLow(step1[4] - step1[5]); + step2[6] = (short)WrapLow(-step1[6] + step1[7]); + step2[7] = (short)WrapLow(step1[6] + step1[7]); + + // stage 3 + step1[0] = (short)WrapLow(step2[0] + step2[3]); + step1[1] = (short)WrapLow(step2[1] + step2[2]); + step1[2] = (short)WrapLow(step2[1] - step2[2]); + step1[3] = (short)WrapLow(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * CosPi16_64; + temp2 = (step2[5] + step2[6]) * CosPi16_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[7] = step2[7]; + + // stage 4 + output[0] = WrapLow(step1[0] + step1[7]); + output[1] = WrapLow(step1[1] + step1[6]); + output[2] = WrapLow(step1[2] + step1[5]); + output[3] = WrapLow(step1[3] + step1[4]); + output[4] = WrapLow(step1[3] - step1[4]); + output[5] = WrapLow(step1[2] - step1[5]); + output[6] = WrapLow(step1[1] - step1[6]); + output[7] = WrapLow(step1[0] - step1[7]); + } + + public static void Idct8x864Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + + // First transform rows + for (i = 0; i < 8; ++i) + { + Idct8(input, outptr); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Then transform columns + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + Idct8(tempIn, tempOut); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], + BitUtils.RoundPowerOfTwo(tempOut[j], 5)); + } + } + } + + public static void Idct8x812Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + + // First transform rows + // Only first 4 row has non-zero coefs + for (i = 0; i < 4; ++i) + { + Idct8(input, outptr); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Then transform columns + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + Idct8(tempIn, tempOut); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], + BitUtils.RoundPowerOfTwo(tempOut[j], 5)); + } + } + } + + public static void Idct8x81Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + long a1; + int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64)); + + output = WrapLow(DctConstRoundShift(output * CosPi16_64)); + a1 = BitUtils.RoundPowerOfTwo(output, 5); + for (j = 0; j < 8; ++j) + { + for (i = 0; i < 8; ++i) + { + dest[i] = ClipPixelAdd(dest[i], a1); + } + + dest = dest.Slice(stride); + } + } + + public static void Iadst16(ReadOnlySpan input, Span output) + { + long s0, s1, s2, s3, s4, s5, s6, s7, s8; + long s9, s10, s11, s12, s13, s14, s15; + long x0 = input[15]; + long x1 = input[0]; + long x2 = input[13]; + long x3 = input[2]; + long x4 = input[11]; + long x5 = input[4]; + long x6 = input[9]; + long x7 = input[6]; + long x8 = input[7]; + long x9 = input[8]; + long x10 = input[5]; + long x11 = input[10]; + long x12 = input[3]; + long x13 = input[12]; + long x14 = input[1]; + long x15 = input[14]; + + if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15) == 0) + { + output.Slice(0, 16).Fill(0); + return; + } + + // stage 1 + s0 = x0 * CosPi1_64 + x1 * CosPi31_64; + s1 = x0 * CosPi31_64 - x1 * CosPi1_64; + s2 = x2 * CosPi5_64 + x3 * CosPi27_64; + s3 = x2 * CosPi27_64 - x3 * CosPi5_64; + s4 = x4 * CosPi9_64 + x5 * CosPi23_64; + s5 = x4 * CosPi23_64 - x5 * CosPi9_64; + s6 = x6 * CosPi13_64 + x7 * CosPi19_64; + s7 = x6 * CosPi19_64 - x7 * CosPi13_64; + s8 = x8 * CosPi17_64 + x9 * CosPi15_64; + s9 = x8 * CosPi15_64 - x9 * CosPi17_64; + s10 = x10 * CosPi21_64 + x11 * CosPi11_64; + s11 = x10 * CosPi11_64 - x11 * CosPi21_64; + s12 = x12 * CosPi25_64 + x13 * CosPi7_64; + s13 = x12 * CosPi7_64 - x13 * CosPi25_64; + s14 = x14 * CosPi29_64 + x15 * CosPi3_64; + s15 = x14 * CosPi3_64 - x15 * CosPi29_64; + + x0 = WrapLow(DctConstRoundShift(s0 + s8)); + x1 = WrapLow(DctConstRoundShift(s1 + s9)); + x2 = WrapLow(DctConstRoundShift(s2 + s10)); + x3 = WrapLow(DctConstRoundShift(s3 + s11)); + x4 = WrapLow(DctConstRoundShift(s4 + s12)); + x5 = WrapLow(DctConstRoundShift(s5 + s13)); + x6 = WrapLow(DctConstRoundShift(s6 + s14)); + x7 = WrapLow(DctConstRoundShift(s7 + s15)); + x8 = WrapLow(DctConstRoundShift(s0 - s8)); + x9 = WrapLow(DctConstRoundShift(s1 - s9)); + x10 = WrapLow(DctConstRoundShift(s2 - s10)); + x11 = WrapLow(DctConstRoundShift(s3 - s11)); + x12 = WrapLow(DctConstRoundShift(s4 - s12)); + x13 = WrapLow(DctConstRoundShift(s5 - s13)); + x14 = WrapLow(DctConstRoundShift(s6 - s14)); + x15 = WrapLow(DctConstRoundShift(s7 - s15)); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4; + s5 = x5; + s6 = x6; + s7 = x7; + s8 = x8 * CosPi4_64 + x9 * CosPi28_64; + s9 = x8 * CosPi28_64 - x9 * CosPi4_64; + s10 = x10 * CosPi20_64 + x11 * CosPi12_64; + s11 = x10 * CosPi12_64 - x11 * CosPi20_64; + s12 = -x12 * CosPi28_64 + x13 * CosPi4_64; + s13 = x12 * CosPi4_64 + x13 * CosPi28_64; + s14 = -x14 * CosPi12_64 + x15 * CosPi20_64; + s15 = x14 * CosPi20_64 + x15 * CosPi12_64; + + x0 = WrapLow(s0 + s4); + x1 = WrapLow(s1 + s5); + x2 = WrapLow(s2 + s6); + x3 = WrapLow(s3 + s7); + x4 = WrapLow(s0 - s4); + x5 = WrapLow(s1 - s5); + x6 = WrapLow(s2 - s6); + x7 = WrapLow(s3 - s7); + x8 = WrapLow(DctConstRoundShift(s8 + s12)); + x9 = WrapLow(DctConstRoundShift(s9 + s13)); + x10 = WrapLow(DctConstRoundShift(s10 + s14)); + x11 = WrapLow(DctConstRoundShift(s11 + s15)); + x12 = WrapLow(DctConstRoundShift(s8 - s12)); + x13 = WrapLow(DctConstRoundShift(s9 - s13)); + x14 = WrapLow(DctConstRoundShift(s10 - s14)); + x15 = WrapLow(DctConstRoundShift(s11 - s15)); + + // stage 3 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4 * CosPi8_64 + x5 * CosPi24_64; + s5 = x4 * CosPi24_64 - x5 * CosPi8_64; + s6 = -x6 * CosPi24_64 + x7 * CosPi8_64; + s7 = x6 * CosPi8_64 + x7 * CosPi24_64; + s8 = x8; + s9 = x9; + s10 = x10; + s11 = x11; + s12 = x12 * CosPi8_64 + x13 * CosPi24_64; + s13 = x12 * CosPi24_64 - x13 * CosPi8_64; + s14 = -x14 * CosPi24_64 + x15 * CosPi8_64; + s15 = x14 * CosPi8_64 + x15 * CosPi24_64; + + x0 = WrapLow(s0 + s2); + x1 = WrapLow(s1 + s3); + x2 = WrapLow(s0 - s2); + x3 = WrapLow(s1 - s3); + x4 = WrapLow(DctConstRoundShift(s4 + s6)); + x5 = WrapLow(DctConstRoundShift(s5 + s7)); + x6 = WrapLow(DctConstRoundShift(s4 - s6)); + x7 = WrapLow(DctConstRoundShift(s5 - s7)); + x8 = WrapLow(s8 + s10); + x9 = WrapLow(s9 + s11); + x10 = WrapLow(s8 - s10); + x11 = WrapLow(s9 - s11); + x12 = WrapLow(DctConstRoundShift(s12 + s14)); + x13 = WrapLow(DctConstRoundShift(s13 + s15)); + x14 = WrapLow(DctConstRoundShift(s12 - s14)); + x15 = WrapLow(DctConstRoundShift(s13 - s15)); + + // stage 4 + s2 = (-CosPi16_64) * (x2 + x3); + s3 = CosPi16_64 * (x2 - x3); + s6 = CosPi16_64 * (x6 + x7); + s7 = CosPi16_64 * (-x6 + x7); + s10 = CosPi16_64 * (x10 + x11); + s11 = CosPi16_64 * (-x10 + x11); + s14 = (-CosPi16_64) * (x14 + x15); + s15 = CosPi16_64 * (x14 - x15); + + x2 = WrapLow(DctConstRoundShift(s2)); + x3 = WrapLow(DctConstRoundShift(s3)); + x6 = WrapLow(DctConstRoundShift(s6)); + x7 = WrapLow(DctConstRoundShift(s7)); + x10 = WrapLow(DctConstRoundShift(s10)); + x11 = WrapLow(DctConstRoundShift(s11)); + x14 = WrapLow(DctConstRoundShift(s14)); + x15 = WrapLow(DctConstRoundShift(s15)); + + output[0] = WrapLow(x0); + output[1] = WrapLow(-x8); + output[2] = WrapLow(x12); + output[3] = WrapLow(-x4); + output[4] = WrapLow(x6); + output[5] = WrapLow(x14); + output[6] = WrapLow(x10); + output[7] = WrapLow(x2); + output[8] = WrapLow(x3); + output[9] = WrapLow(x11); + output[10] = WrapLow(x15); + output[11] = WrapLow(x7); + output[12] = WrapLow(x5); + output[13] = WrapLow(-x13); + output[14] = WrapLow(x9); + output[15] = WrapLow(-x1); + } + + public static void Idct16(ReadOnlySpan input, Span output) + { + Span step1 = stackalloc short[16]; + Span step2 = stackalloc short[16]; + long temp1, temp2; + + // stage 1 + step1[0] = (short)input[0 / 2]; + step1[1] = (short)input[16 / 2]; + step1[2] = (short)input[8 / 2]; + step1[3] = (short)input[24 / 2]; + step1[4] = (short)input[4 / 2]; + step1[5] = (short)input[20 / 2]; + step1[6] = (short)input[12 / 2]; + step1[7] = (short)input[28 / 2]; + step1[8] = (short)input[2 / 2]; + step1[9] = (short)input[18 / 2]; + step1[10] = (short)input[10 / 2]; + step1[11] = (short)input[26 / 2]; + step1[12] = (short)input[6 / 2]; + step1[13] = (short)input[22 / 2]; + step1[14] = (short)input[14 / 2]; + step1[15] = (short)input[30 / 2]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * CosPi30_64 - step1[15] * CosPi2_64; + temp2 = step1[8] * CosPi2_64 + step1[15] * CosPi30_64; + step2[8] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[15] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[9] * CosPi14_64 - step1[14] * CosPi18_64; + temp2 = step1[9] * CosPi18_64 + step1[14] * CosPi14_64; + step2[9] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[14] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[10] * CosPi22_64 - step1[13] * CosPi10_64; + temp2 = step1[10] * CosPi10_64 + step1[13] * CosPi22_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[11] * CosPi6_64 - step1[12] * CosPi26_64; + temp2 = step1[11] * CosPi26_64 + step1[12] * CosPi6_64; + step2[11] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[12] = (short)WrapLow(DctConstRoundShift(temp2)); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * CosPi28_64 - step2[7] * CosPi4_64; + temp2 = step2[4] * CosPi4_64 + step2[7] * CosPi28_64; + step1[4] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[7] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = step2[5] * CosPi12_64 - step2[6] * CosPi20_64; + temp2 = step2[5] * CosPi20_64 + step2[6] * CosPi12_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + + step1[8] = (short)WrapLow(step2[8] + step2[9]); + step1[9] = (short)WrapLow(step2[8] - step2[9]); + step1[10] = (short)WrapLow(-step2[10] + step2[11]); + step1[11] = (short)WrapLow(step2[10] + step2[11]); + step1[12] = (short)WrapLow(step2[12] + step2[13]); + step1[13] = (short)WrapLow(step2[12] - step2[13]); + step1[14] = (short)WrapLow(-step2[14] + step2[15]); + step1[15] = (short)WrapLow(step2[14] + step2[15]); + + // stage 4 + temp1 = (step1[0] + step1[1]) * CosPi16_64; + temp2 = (step1[0] - step1[1]) * CosPi16_64; + step2[0] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[1] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = step1[2] * CosPi24_64 - step1[3] * CosPi8_64; + temp2 = step1[2] * CosPi8_64 + step1[3] * CosPi24_64; + step2[2] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[3] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[4] = (short)WrapLow(step1[4] + step1[5]); + step2[5] = (short)WrapLow(step1[4] - step1[5]); + step2[6] = (short)WrapLow(-step1[6] + step1[7]); + step2[7] = (short)WrapLow(step1[6] + step1[7]); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * CosPi8_64 + step1[14] * CosPi24_64; + temp2 = step1[9] * CosPi24_64 + step1[14] * CosPi8_64; + step2[9] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[14] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step1[10] * CosPi24_64 - step1[13] * CosPi8_64; + temp2 = -step1[10] * CosPi8_64 + step1[13] * CosPi24_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = (short)WrapLow(step2[0] + step2[3]); + step1[1] = (short)WrapLow(step2[1] + step2[2]); + step1[2] = (short)WrapLow(step2[1] - step2[2]); + step1[3] = (short)WrapLow(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * CosPi16_64; + temp2 = (step2[5] + step2[6]) * CosPi16_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[7] = step2[7]; + + step1[8] = (short)WrapLow(step2[8] + step2[11]); + step1[9] = (short)WrapLow(step2[9] + step2[10]); + step1[10] = (short)WrapLow(step2[9] - step2[10]); + step1[11] = (short)WrapLow(step2[8] - step2[11]); + step1[12] = (short)WrapLow(-step2[12] + step2[15]); + step1[13] = (short)WrapLow(-step2[13] + step2[14]); + step1[14] = (short)WrapLow(step2[13] + step2[14]); + step1[15] = (short)WrapLow(step2[12] + step2[15]); + + // stage 6 + step2[0] = (short)WrapLow(step1[0] + step1[7]); + step2[1] = (short)WrapLow(step1[1] + step1[6]); + step2[2] = (short)WrapLow(step1[2] + step1[5]); + step2[3] = (short)WrapLow(step1[3] + step1[4]); + step2[4] = (short)WrapLow(step1[3] - step1[4]); + step2[5] = (short)WrapLow(step1[2] - step1[5]); + step2[6] = (short)WrapLow(step1[1] - step1[6]); + step2[7] = (short)WrapLow(step1[0] - step1[7]); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * CosPi16_64; + temp2 = (step1[10] + step1[13]) * CosPi16_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (-step1[11] + step1[12]) * CosPi16_64; + temp2 = (step1[11] + step1[12]) * CosPi16_64; + step2[11] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[12] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = WrapLow(step2[0] + step2[15]); + output[1] = WrapLow(step2[1] + step2[14]); + output[2] = WrapLow(step2[2] + step2[13]); + output[3] = WrapLow(step2[3] + step2[12]); + output[4] = WrapLow(step2[4] + step2[11]); + output[5] = WrapLow(step2[5] + step2[10]); + output[6] = WrapLow(step2[6] + step2[9]); + output[7] = WrapLow(step2[7] + step2[8]); + output[8] = WrapLow(step2[7] - step2[8]); + output[9] = WrapLow(step2[6] - step2[9]); + output[10] = WrapLow(step2[5] - step2[10]); + output[11] = WrapLow(step2[4] - step2[11]); + output[12] = WrapLow(step2[3] - step2[12]); + output[13] = WrapLow(step2[2] - step2[13]); + output[14] = WrapLow(step2[1] - step2[14]); + output[15] = WrapLow(step2[0] - step2[15]); + } + + public static void Idct16x16256Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows + for (i = 0; i < 16; ++i) + { + Idct16(input, outptr); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + Idct16(tempIn, tempOut); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct16x1638Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) + { + Idct16(input, outptr); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + Idct16(tempIn, tempOut); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct16x1610Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 4x4 area, we only need to calculate first 4 rows here. + for (i = 0; i < 4; ++i) + { + Idct16(input, outptr); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + Idct16(tempIn, tempOut); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct16x161Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + long a1; + int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64)); + + output = WrapLow(DctConstRoundShift(output * CosPi16_64)); + a1 = BitUtils.RoundPowerOfTwo(output, 6); + for (j = 0; j < 16; ++j) + { + for (i = 0; i < 16; ++i) + { + dest[i] = ClipPixelAdd(dest[i], a1); + } + + dest = dest.Slice(stride); + } + } + + public static void Idct32(ReadOnlySpan input, Span output) + { + Span step1 = stackalloc short[32]; + Span step2 = stackalloc short[32]; + long temp1, temp2; + + // stage 1 + step1[0] = (short)input[0]; + step1[1] = (short)input[16]; + step1[2] = (short)input[8]; + step1[3] = (short)input[24]; + step1[4] = (short)input[4]; + step1[5] = (short)input[20]; + step1[6] = (short)input[12]; + step1[7] = (short)input[28]; + step1[8] = (short)input[2]; + step1[9] = (short)input[18]; + step1[10] = (short)input[10]; + step1[11] = (short)input[26]; + step1[12] = (short)input[6]; + step1[13] = (short)input[22]; + step1[14] = (short)input[14]; + step1[15] = (short)input[30]; + + temp1 = (short)input[1] * CosPi31_64 - (short)input[31] * CosPi1_64; + temp2 = (short)input[1] * CosPi1_64 + (short)input[31] * CosPi31_64; + step1[16] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[31] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[17] * CosPi15_64 - (short)input[15] * CosPi17_64; + temp2 = (short)input[17] * CosPi17_64 + (short)input[15] * CosPi15_64; + step1[17] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[30] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[9] * CosPi23_64 - (short)input[23] * CosPi9_64; + temp2 = (short)input[9] * CosPi9_64 + (short)input[23] * CosPi23_64; + step1[18] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[29] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[25] * CosPi7_64 - (short)input[7] * CosPi25_64; + temp2 = (short)input[25] * CosPi25_64 + (short)input[7] * CosPi7_64; + step1[19] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[28] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[5] * CosPi27_64 - (short)input[27] * CosPi5_64; + temp2 = (short)input[5] * CosPi5_64 + (short)input[27] * CosPi27_64; + step1[20] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[27] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[21] * CosPi11_64 - (short)input[11] * CosPi21_64; + temp2 = (short)input[21] * CosPi21_64 + (short)input[11] * CosPi11_64; + step1[21] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[26] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[13] * CosPi19_64 - (short)input[19] * CosPi13_64; + temp2 = (short)input[13] * CosPi13_64 + (short)input[19] * CosPi19_64; + step1[22] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[25] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = (short)input[29] * CosPi3_64 - (short)input[3] * CosPi29_64; + temp2 = (short)input[29] * CosPi29_64 + (short)input[3] * CosPi3_64; + step1[23] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[24] = (short)WrapLow(DctConstRoundShift(temp2)); + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * CosPi30_64 - step1[15] * CosPi2_64; + temp2 = step1[8] * CosPi2_64 + step1[15] * CosPi30_64; + step2[8] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[15] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[9] * CosPi14_64 - step1[14] * CosPi18_64; + temp2 = step1[9] * CosPi18_64 + step1[14] * CosPi14_64; + step2[9] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[14] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[10] * CosPi22_64 - step1[13] * CosPi10_64; + temp2 = step1[10] * CosPi10_64 + step1[13] * CosPi22_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + + temp1 = step1[11] * CosPi6_64 - step1[12] * CosPi26_64; + temp2 = step1[11] * CosPi26_64 + step1[12] * CosPi6_64; + step2[11] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[12] = (short)WrapLow(DctConstRoundShift(temp2)); + + step2[16] = (short)WrapLow(step1[16] + step1[17]); + step2[17] = (short)WrapLow(step1[16] - step1[17]); + step2[18] = (short)WrapLow(-step1[18] + step1[19]); + step2[19] = (short)WrapLow(step1[18] + step1[19]); + step2[20] = (short)WrapLow(step1[20] + step1[21]); + step2[21] = (short)WrapLow(step1[20] - step1[21]); + step2[22] = (short)WrapLow(-step1[22] + step1[23]); + step2[23] = (short)WrapLow(step1[22] + step1[23]); + step2[24] = (short)WrapLow(step1[24] + step1[25]); + step2[25] = (short)WrapLow(step1[24] - step1[25]); + step2[26] = (short)WrapLow(-step1[26] + step1[27]); + step2[27] = (short)WrapLow(step1[26] + step1[27]); + step2[28] = (short)WrapLow(step1[28] + step1[29]); + step2[29] = (short)WrapLow(step1[28] - step1[29]); + step2[30] = (short)WrapLow(-step1[30] + step1[31]); + step2[31] = (short)WrapLow(step1[30] + step1[31]); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * CosPi28_64 - step2[7] * CosPi4_64; + temp2 = step2[4] * CosPi4_64 + step2[7] * CosPi28_64; + step1[4] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[7] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = step2[5] * CosPi12_64 - step2[6] * CosPi20_64; + temp2 = step2[5] * CosPi20_64 + step2[6] * CosPi12_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + + step1[8] = (short)WrapLow(step2[8] + step2[9]); + step1[9] = (short)WrapLow(step2[8] - step2[9]); + step1[10] = (short)WrapLow(-step2[10] + step2[11]); + step1[11] = (short)WrapLow(step2[10] + step2[11]); + step1[12] = (short)WrapLow(step2[12] + step2[13]); + step1[13] = (short)WrapLow(step2[12] - step2[13]); + step1[14] = (short)WrapLow(-step2[14] + step2[15]); + step1[15] = (short)WrapLow(step2[14] + step2[15]); + + step1[16] = step2[16]; + step1[31] = step2[31]; + temp1 = -step2[17] * CosPi4_64 + step2[30] * CosPi28_64; + temp2 = step2[17] * CosPi28_64 + step2[30] * CosPi4_64; + step1[17] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[30] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step2[18] * CosPi28_64 - step2[29] * CosPi4_64; + temp2 = -step2[18] * CosPi4_64 + step2[29] * CosPi28_64; + step1[18] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[29] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[19] = step2[19]; + step1[20] = step2[20]; + temp1 = -step2[21] * CosPi20_64 + step2[26] * CosPi12_64; + temp2 = step2[21] * CosPi12_64 + step2[26] * CosPi20_64; + step1[21] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[26] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step2[22] * CosPi12_64 - step2[25] * CosPi20_64; + temp2 = -step2[22] * CosPi20_64 + step2[25] * CosPi12_64; + step1[22] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[25] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[27] = step2[27]; + step1[28] = step2[28]; + + // stage 4 + temp1 = (step1[0] + step1[1]) * CosPi16_64; + temp2 = (step1[0] - step1[1]) * CosPi16_64; + step2[0] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[1] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = step1[2] * CosPi24_64 - step1[3] * CosPi8_64; + temp2 = step1[2] * CosPi8_64 + step1[3] * CosPi24_64; + step2[2] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[3] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[4] = (short)WrapLow(step1[4] + step1[5]); + step2[5] = (short)WrapLow(step1[4] - step1[5]); + step2[6] = (short)WrapLow(-step1[6] + step1[7]); + step2[7] = (short)WrapLow(step1[6] + step1[7]); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * CosPi8_64 + step1[14] * CosPi24_64; + temp2 = step1[9] * CosPi24_64 + step1[14] * CosPi8_64; + step2[9] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[14] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step1[10] * CosPi24_64 - step1[13] * CosPi8_64; + temp2 = -step1[10] * CosPi8_64 + step1[13] * CosPi24_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[11] = step1[11]; + step2[12] = step1[12]; + + step2[16] = (short)WrapLow(step1[16] + step1[19]); + step2[17] = (short)WrapLow(step1[17] + step1[18]); + step2[18] = (short)WrapLow(step1[17] - step1[18]); + step2[19] = (short)WrapLow(step1[16] - step1[19]); + step2[20] = (short)WrapLow(-step1[20] + step1[23]); + step2[21] = (short)WrapLow(-step1[21] + step1[22]); + step2[22] = (short)WrapLow(step1[21] + step1[22]); + step2[23] = (short)WrapLow(step1[20] + step1[23]); + + step2[24] = (short)WrapLow(step1[24] + step1[27]); + step2[25] = (short)WrapLow(step1[25] + step1[26]); + step2[26] = (short)WrapLow(step1[25] - step1[26]); + step2[27] = (short)WrapLow(step1[24] - step1[27]); + step2[28] = (short)WrapLow(-step1[28] + step1[31]); + step2[29] = (short)WrapLow(-step1[29] + step1[30]); + step2[30] = (short)WrapLow(step1[29] + step1[30]); + step2[31] = (short)WrapLow(step1[28] + step1[31]); + + // stage 5 + step1[0] = (short)WrapLow(step2[0] + step2[3]); + step1[1] = (short)WrapLow(step2[1] + step2[2]); + step1[2] = (short)WrapLow(step2[1] - step2[2]); + step1[3] = (short)WrapLow(step2[0] - step2[3]); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * CosPi16_64; + temp2 = (step2[5] + step2[6]) * CosPi16_64; + step1[5] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[6] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[7] = step2[7]; + + step1[8] = (short)WrapLow(step2[8] + step2[11]); + step1[9] = (short)WrapLow(step2[9] + step2[10]); + step1[10] = (short)WrapLow(step2[9] - step2[10]); + step1[11] = (short)WrapLow(step2[8] - step2[11]); + step1[12] = (short)WrapLow(-step2[12] + step2[15]); + step1[13] = (short)WrapLow(-step2[13] + step2[14]); + step1[14] = (short)WrapLow(step2[13] + step2[14]); + step1[15] = (short)WrapLow(step2[12] + step2[15]); + + step1[16] = step2[16]; + step1[17] = step2[17]; + temp1 = -step2[18] * CosPi8_64 + step2[29] * CosPi24_64; + temp2 = step2[18] * CosPi24_64 + step2[29] * CosPi8_64; + step1[18] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[29] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step2[19] * CosPi8_64 + step2[28] * CosPi24_64; + temp2 = step2[19] * CosPi24_64 + step2[28] * CosPi8_64; + step1[19] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[28] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step2[20] * CosPi24_64 - step2[27] * CosPi8_64; + temp2 = -step2[20] * CosPi8_64 + step2[27] * CosPi24_64; + step1[20] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[27] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = -step2[21] * CosPi24_64 - step2[26] * CosPi8_64; + temp2 = -step2[21] * CosPi8_64 + step2[26] * CosPi24_64; + step1[21] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[26] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[22] = step2[22]; + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[25] = step2[25]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // stage 6 + step2[0] = (short)WrapLow(step1[0] + step1[7]); + step2[1] = (short)WrapLow(step1[1] + step1[6]); + step2[2] = (short)WrapLow(step1[2] + step1[5]); + step2[3] = (short)WrapLow(step1[3] + step1[4]); + step2[4] = (short)WrapLow(step1[3] - step1[4]); + step2[5] = (short)WrapLow(step1[2] - step1[5]); + step2[6] = (short)WrapLow(step1[1] - step1[6]); + step2[7] = (short)WrapLow(step1[0] - step1[7]); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * CosPi16_64; + temp2 = (step1[10] + step1[13]) * CosPi16_64; + step2[10] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[13] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (-step1[11] + step1[12]) * CosPi16_64; + temp2 = (step1[11] + step1[12]) * CosPi16_64; + step2[11] = (short)WrapLow(DctConstRoundShift(temp1)); + step2[12] = (short)WrapLow(DctConstRoundShift(temp2)); + step2[14] = step1[14]; + step2[15] = step1[15]; + + step2[16] = (short)WrapLow(step1[16] + step1[23]); + step2[17] = (short)WrapLow(step1[17] + step1[22]); + step2[18] = (short)WrapLow(step1[18] + step1[21]); + step2[19] = (short)WrapLow(step1[19] + step1[20]); + step2[20] = (short)WrapLow(step1[19] - step1[20]); + step2[21] = (short)WrapLow(step1[18] - step1[21]); + step2[22] = (short)WrapLow(step1[17] - step1[22]); + step2[23] = (short)WrapLow(step1[16] - step1[23]); + + step2[24] = (short)WrapLow(-step1[24] + step1[31]); + step2[25] = (short)WrapLow(-step1[25] + step1[30]); + step2[26] = (short)WrapLow(-step1[26] + step1[29]); + step2[27] = (short)WrapLow(-step1[27] + step1[28]); + step2[28] = (short)WrapLow(step1[27] + step1[28]); + step2[29] = (short)WrapLow(step1[26] + step1[29]); + step2[30] = (short)WrapLow(step1[25] + step1[30]); + step2[31] = (short)WrapLow(step1[24] + step1[31]); + + // stage 7 + step1[0] = (short)WrapLow(step2[0] + step2[15]); + step1[1] = (short)WrapLow(step2[1] + step2[14]); + step1[2] = (short)WrapLow(step2[2] + step2[13]); + step1[3] = (short)WrapLow(step2[3] + step2[12]); + step1[4] = (short)WrapLow(step2[4] + step2[11]); + step1[5] = (short)WrapLow(step2[5] + step2[10]); + step1[6] = (short)WrapLow(step2[6] + step2[9]); + step1[7] = (short)WrapLow(step2[7] + step2[8]); + step1[8] = (short)WrapLow(step2[7] - step2[8]); + step1[9] = (short)WrapLow(step2[6] - step2[9]); + step1[10] = (short)WrapLow(step2[5] - step2[10]); + step1[11] = (short)WrapLow(step2[4] - step2[11]); + step1[12] = (short)WrapLow(step2[3] - step2[12]); + step1[13] = (short)WrapLow(step2[2] - step2[13]); + step1[14] = (short)WrapLow(step2[1] - step2[14]); + step1[15] = (short)WrapLow(step2[0] - step2[15]); + + step1[16] = step2[16]; + step1[17] = step2[17]; + step1[18] = step2[18]; + step1[19] = step2[19]; + temp1 = (-step2[20] + step2[27]) * CosPi16_64; + temp2 = (step2[20] + step2[27]) * CosPi16_64; + step1[20] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[27] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (-step2[21] + step2[26]) * CosPi16_64; + temp2 = (step2[21] + step2[26]) * CosPi16_64; + step1[21] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[26] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (-step2[22] + step2[25]) * CosPi16_64; + temp2 = (step2[22] + step2[25]) * CosPi16_64; + step1[22] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[25] = (short)WrapLow(DctConstRoundShift(temp2)); + temp1 = (-step2[23] + step2[24]) * CosPi16_64; + temp2 = (step2[23] + step2[24]) * CosPi16_64; + step1[23] = (short)WrapLow(DctConstRoundShift(temp1)); + step1[24] = (short)WrapLow(DctConstRoundShift(temp2)); + step1[28] = step2[28]; + step1[29] = step2[29]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // final stage + output[0] = WrapLow(step1[0] + step1[31]); + output[1] = WrapLow(step1[1] + step1[30]); + output[2] = WrapLow(step1[2] + step1[29]); + output[3] = WrapLow(step1[3] + step1[28]); + output[4] = WrapLow(step1[4] + step1[27]); + output[5] = WrapLow(step1[5] + step1[26]); + output[6] = WrapLow(step1[6] + step1[25]); + output[7] = WrapLow(step1[7] + step1[24]); + output[8] = WrapLow(step1[8] + step1[23]); + output[9] = WrapLow(step1[9] + step1[22]); + output[10] = WrapLow(step1[10] + step1[21]); + output[11] = WrapLow(step1[11] + step1[20]); + output[12] = WrapLow(step1[12] + step1[19]); + output[13] = WrapLow(step1[13] + step1[18]); + output[14] = WrapLow(step1[14] + step1[17]); + output[15] = WrapLow(step1[15] + step1[16]); + output[16] = WrapLow(step1[15] - step1[16]); + output[17] = WrapLow(step1[14] - step1[17]); + output[18] = WrapLow(step1[13] - step1[18]); + output[19] = WrapLow(step1[12] - step1[19]); + output[20] = WrapLow(step1[11] - step1[20]); + output[21] = WrapLow(step1[10] - step1[21]); + output[22] = WrapLow(step1[9] - step1[22]); + output[23] = WrapLow(step1[8] - step1[23]); + output[24] = WrapLow(step1[7] - step1[24]); + output[25] = WrapLow(step1[6] - step1[25]); + output[26] = WrapLow(step1[5] - step1[26]); + output[27] = WrapLow(step1[4] - step1[27]); + output[28] = WrapLow(step1[3] - step1[28]); + output[29] = WrapLow(step1[2] - step1[29]); + output[30] = WrapLow(step1[1] - step1[30]); + output[31] = WrapLow(step1[0] - step1[31]); + } + + public static void Idct32x321024Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + for (i = 0; i < 32; ++i) + { + short zeroCoeff = 0; + for (j = 0; j < 32; ++j) + { + zeroCoeff |= (short)input[j]; + } + + if (zeroCoeff != 0) + { + Idct32(input, outptr); + } + else + { + outptr.Slice(0, 32).Fill(0); + } + + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + Idct32(tempIn, tempOut); + for (j = 0; j < 32; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct32x32135Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + // Only upper-left 16x16 has non-zero coeff + for (i = 0; i < 16; ++i) + { + Idct32(input, outptr); + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + Idct32(tempIn, tempOut); + for (j = 0; j < 32; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct32x3234Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + // Only upper-left 8x8 has non-zero coeff + for (i = 0; i < 8; ++i) + { + Idct32(input, outptr); + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + Idct32(tempIn, tempOut); + for (j = 0; j < 32; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + public static void Idct32x321Add(ReadOnlySpan input, Span dest, int stride) + { + int i, j; + long a1; + int output = WrapLow(DctConstRoundShift((short)input[0] * CosPi16_64)); + + output = WrapLow(DctConstRoundShift(output * CosPi16_64)); + a1 = BitUtils.RoundPowerOfTwo(output, 6); + + for (j = 0; j < 32; ++j) + { + for (i = 0; i < 32; ++i) + { + dest[i] = ClipPixelAdd(dest[i], a1); + } + + dest = dest.Slice(stride); + } + } + + public static void HighbdIwht4x416Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, + 0.5 shifts per pixel. */ + int i; + Span output = stackalloc int[16]; + long a1, b1, c1, d1, e1; + ReadOnlySpan ip = input; + Span op = output; + + for (i = 0; i < 4; i++) + { + a1 = ip[0] >> UnitQuantShift; + c1 = ip[1] >> UnitQuantShift; + d1 = ip[2] >> UnitQuantShift; + b1 = ip[3] >> UnitQuantShift; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + op[0] = HighbdWrapLow(a1, bd); + op[1] = HighbdWrapLow(b1, bd); + op[2] = HighbdWrapLow(c1, bd); + op[3] = HighbdWrapLow(d1, bd); + ip = ip.Slice(4); + op = op.Slice(4); + } + + ReadOnlySpan ip2 = output; + for (i = 0; i < 4; i++) + { + a1 = ip2[4 * 0]; + c1 = ip2[4 * 1]; + d1 = ip2[4 * 2]; + b1 = ip2[4 * 3]; + a1 += c1; + d1 -= b1; + e1 = (a1 - d1) >> 1; + b1 = e1 - b1; + c1 = e1 - c1; + a1 -= b1; + d1 += c1; + dest[stride * 0] = HighbdClipPixelAdd(dest[stride * 0], HighbdWrapLow(a1, bd), bd); + dest[stride * 1] = HighbdClipPixelAdd(dest[stride * 1], HighbdWrapLow(b1, bd), bd); + dest[stride * 2] = HighbdClipPixelAdd(dest[stride * 2], HighbdWrapLow(c1, bd), bd); + dest[stride * 3] = HighbdClipPixelAdd(dest[stride * 3], HighbdWrapLow(d1, bd), bd); + + ip2 = ip2.Slice(1); + dest = dest.Slice(1); + } + } + + public static void HighbdIwht4x41Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i; + long a1, e1; + Span tmp = stackalloc int[4]; + ReadOnlySpan ip = input; + Span op = tmp; + + a1 = ip[0] >> UnitQuantShift; + e1 = a1 >> 1; + a1 -= e1; + op[0] = HighbdWrapLow(a1, bd); + op[1] = op[2] = op[3] = HighbdWrapLow(e1, bd); + + ReadOnlySpan ip2 = tmp; + for (i = 0; i < 4; i++) + { + e1 = ip2[0] >> 1; + a1 = ip2[0] - e1; + dest[stride * 0] = HighbdClipPixelAdd(dest[stride * 0], a1, bd); + dest[stride * 1] = HighbdClipPixelAdd(dest[stride * 1], e1, bd); + dest[stride * 2] = HighbdClipPixelAdd(dest[stride * 2], e1, bd); + dest[stride * 3] = HighbdClipPixelAdd(dest[stride * 3], e1, bd); + ip2 = ip2.Slice(1); + dest = dest.Slice(1); + } + } + + public static void HighbdIadst4(ReadOnlySpan input, Span output, int bd) + { + long s0, s1, s2, s3, s4, s5, s6, s7; + int x0 = input[0]; + int x1 = input[1]; + int x2 = input[2]; + int x3 = input[3]; + + if (DetectInvalidHighbdInput(input, 4) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 4).Fill(0); + return; + } + + if ((x0 | x1 | x2 | x3) == 0) + { + output.Slice(0, 4).Fill(0); + return; + } + + s0 = (long)SinPi1_9 * x0; + s1 = (long)SinPi2_9 * x0; + s2 = (long)SinPi3_9 * x1; + s3 = (long)SinPi4_9 * x2; + s4 = (long)SinPi1_9 * x2; + s5 = (long)SinPi2_9 * x3; + s6 = (long)SinPi4_9 * x3; + s7 = HighbdWrapLow(x0 - x2 + x3, bd); + + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = SinPi3_9 * s7; + + // 1-D transform scaling factor is sqrt(2). + // The overall dynamic range is 14b (input) + 14b (multiplication scaling) + // + 1b (addition) = 29b. + // Hence the output bit depth is 15b. + output[0] = HighbdWrapLow(DctConstRoundShift(s0 + s3), bd); + output[1] = HighbdWrapLow(DctConstRoundShift(s1 + s3), bd); + output[2] = HighbdWrapLow(DctConstRoundShift(s2), bd); + output[3] = HighbdWrapLow(DctConstRoundShift(s0 + s1 - s3), bd); + } + + public static void HighbdIdct4(ReadOnlySpan input, Span output, int bd) + { + Span step = stackalloc int[4]; + long temp1, temp2; + + if (DetectInvalidHighbdInput(input, 4) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 4).Fill(0); + return; + } + + // stage 1 + temp1 = (input[0] + input[2]) * (long)CosPi16_64; + temp2 = (input[0] - input[2]) * (long)CosPi16_64; + step[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = input[1] * (long)CosPi24_64 - input[3] * (long)CosPi8_64; + temp2 = input[1] * (long)CosPi8_64 + input[3] * (long)CosPi24_64; + step[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + // stage 2 + output[0] = HighbdWrapLow(step[0] + step[3], bd); + output[1] = HighbdWrapLow(step[1] + step[2], bd); + output[2] = HighbdWrapLow(step[1] - step[2], bd); + output[3] = HighbdWrapLow(step[0] - step[3], bd); + } + + public static void HighbdIdct4x416Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[4 * 4]; + Span outptr = output; + Span tempIn = stackalloc int[4]; + Span tempOut = stackalloc int[4]; + + // Rows + for (i = 0; i < 4; ++i) + { + HighbdIdct4(input, outptr, bd); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Columns + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + HighbdIdct4(tempIn, tempOut, bd); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd); + } + } + } + + public static void HighbdIdct4x41Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i; + long a1; + int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd); + + output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd); + a1 = BitUtils.RoundPowerOfTwo(output, 4); + + for (i = 0; i < 4; i++) + { + dest[0] = HighbdClipPixelAdd(dest[0], a1, bd); + dest[1] = HighbdClipPixelAdd(dest[1], a1, bd); + dest[2] = HighbdClipPixelAdd(dest[2], a1, bd); + dest[3] = HighbdClipPixelAdd(dest[3], a1, bd); + dest = dest.Slice(stride); + } + } + + public static void HighbdIadst8(ReadOnlySpan input, Span output, int bd) + { + long s0, s1, s2, s3, s4, s5, s6, s7; + int x0 = input[7]; + int x1 = input[0]; + int x2 = input[5]; + int x3 = input[2]; + int x4 = input[3]; + int x5 = input[4]; + int x6 = input[1]; + int x7 = input[6]; + + if (DetectInvalidHighbdInput(input, 8) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 8).Fill(0); + return; + } + + if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7) == 0) + { + output.Slice(0, 8).Fill(0); + return; + } + + // stage 1 + s0 = (long)CosPi2_64 * x0 + (long)CosPi30_64 * x1; + s1 = (long)CosPi30_64 * x0 - (long)CosPi2_64 * x1; + s2 = (long)CosPi10_64 * x2 + (long)CosPi22_64 * x3; + s3 = (long)CosPi22_64 * x2 - (long)CosPi10_64 * x3; + s4 = (long)CosPi18_64 * x4 + (long)CosPi14_64 * x5; + s5 = (long)CosPi14_64 * x4 - (long)CosPi18_64 * x5; + s6 = (long)CosPi26_64 * x6 + (long)CosPi6_64 * x7; + s7 = (long)CosPi6_64 * x6 - (long)CosPi26_64 * x7; + + x0 = HighbdWrapLow(DctConstRoundShift(s0 + s4), bd); + x1 = HighbdWrapLow(DctConstRoundShift(s1 + s5), bd); + x2 = HighbdWrapLow(DctConstRoundShift(s2 + s6), bd); + x3 = HighbdWrapLow(DctConstRoundShift(s3 + s7), bd); + x4 = HighbdWrapLow(DctConstRoundShift(s0 - s4), bd); + x5 = HighbdWrapLow(DctConstRoundShift(s1 - s5), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s2 - s6), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s3 - s7), bd); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = (long)CosPi8_64 * x4 + (long)CosPi24_64 * x5; + s5 = (long)CosPi24_64 * x4 - (long)CosPi8_64 * x5; + s6 = (long)(-CosPi24_64) * x6 + (long)CosPi8_64 * x7; + s7 = (long)CosPi8_64 * x6 + (long)CosPi24_64 * x7; + + x0 = HighbdWrapLow(s0 + s2, bd); + x1 = HighbdWrapLow(s1 + s3, bd); + x2 = HighbdWrapLow(s0 - s2, bd); + x3 = HighbdWrapLow(s1 - s3, bd); + x4 = HighbdWrapLow(DctConstRoundShift(s4 + s6), bd); + x5 = HighbdWrapLow(DctConstRoundShift(s5 + s7), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s4 - s6), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s5 - s7), bd); + + // stage 3 + s2 = (long)CosPi16_64 * (x2 + x3); + s3 = (long)CosPi16_64 * (x2 - x3); + s6 = (long)CosPi16_64 * (x6 + x7); + s7 = (long)CosPi16_64 * (x6 - x7); + + x2 = HighbdWrapLow(DctConstRoundShift(s2), bd); + x3 = HighbdWrapLow(DctConstRoundShift(s3), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s6), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s7), bd); + + output[0] = HighbdWrapLow(x0, bd); + output[1] = HighbdWrapLow(-x4, bd); + output[2] = HighbdWrapLow(x6, bd); + output[3] = HighbdWrapLow(-x2, bd); + output[4] = HighbdWrapLow(x3, bd); + output[5] = HighbdWrapLow(-x7, bd); + output[6] = HighbdWrapLow(x5, bd); + output[7] = HighbdWrapLow(-x1, bd); + } + + public static void HighbdIdct8(ReadOnlySpan input, Span output, int bd) + { + Span step1 = stackalloc int[8]; + Span step2 = stackalloc int[8]; + long temp1, temp2; + + if (DetectInvalidHighbdInput(input, 8) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 8).Fill(0); + return; + } + + // stage 1 + step1[0] = input[0]; + step1[2] = input[4]; + step1[1] = input[2]; + step1[3] = input[6]; + temp1 = input[1] * (long)CosPi28_64 - input[7] * (long)CosPi4_64; + temp2 = input[1] * (long)CosPi4_64 + input[7] * (long)CosPi28_64; + step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = input[5] * (long)CosPi12_64 - input[3] * (long)CosPi20_64; + temp2 = input[5] * (long)CosPi20_64 + input[3] * (long)CosPi12_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + // stage 2 & stage 3 - even half + HighbdIdct4(step1, step1, bd); + + // stage 2 - odd half + step2[4] = HighbdWrapLow(step1[4] + step1[5], bd); + step2[5] = HighbdWrapLow(step1[4] - step1[5], bd); + step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd); + step2[7] = HighbdWrapLow(step1[6] + step1[7], bd); + + // stage 3 - odd half + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * (long)CosPi16_64; + temp2 = (step2[5] + step2[6]) * (long)CosPi16_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[7] = step2[7]; + + // stage 4 + output[0] = HighbdWrapLow(step1[0] + step1[7], bd); + output[1] = HighbdWrapLow(step1[1] + step1[6], bd); + output[2] = HighbdWrapLow(step1[2] + step1[5], bd); + output[3] = HighbdWrapLow(step1[3] + step1[4], bd); + output[4] = HighbdWrapLow(step1[3] - step1[4], bd); + output[5] = HighbdWrapLow(step1[2] - step1[5], bd); + output[6] = HighbdWrapLow(step1[1] - step1[6], bd); + output[7] = HighbdWrapLow(step1[0] - step1[7], bd); + } + + public static void HighbdIdct8x864Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + + // First transform rows + for (i = 0; i < 8; ++i) + { + HighbdIdct8(input, outptr, bd); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Then transform columns + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + HighbdIdct8(tempIn, tempOut, bd); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd); + } + } + } + + public static void HighbdIdct8x812Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + + // First transform rows + // Only first 4 row has non-zero coefs + for (i = 0; i < 4; ++i) + { + HighbdIdct8(input, outptr, bd); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Then transform columns + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + HighbdIdct8(tempIn, tempOut, bd); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd); + } + } + } + + public static void vpx_Highbdidct8x8_1_add_c(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + long a1; + int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd); + + output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd); + a1 = BitUtils.RoundPowerOfTwo(output, 5); + for (j = 0; j < 8; ++j) + { + for (i = 0; i < 8; ++i) + { + dest[i] = HighbdClipPixelAdd(dest[i], a1, bd); + } + + dest = dest.Slice(stride); + } + } + + public static void HighbdIadst16(ReadOnlySpan input, Span output, int bd) + { + long s0, s1, s2, s3, s4, s5, s6, s7, s8; + long s9, s10, s11, s12, s13, s14, s15; + int x0 = input[15]; + int x1 = input[0]; + int x2 = input[13]; + int x3 = input[2]; + int x4 = input[11]; + int x5 = input[4]; + int x6 = input[9]; + int x7 = input[6]; + int x8 = input[7]; + int x9 = input[8]; + int x10 = input[5]; + int x11 = input[10]; + int x12 = input[3]; + int x13 = input[12]; + int x14 = input[1]; + int x15 = input[14]; + if (DetectInvalidHighbdInput(input, 16) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 16).Fill(0); + return; + } + + if ((x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15) == 0) + { + output.Slice(0, 16).Fill(0); + return; + } + + // stage 1 + s0 = x0 * (long)CosPi1_64 + x1 * (long)CosPi31_64; + s1 = x0 * (long)CosPi31_64 - x1 * (long)CosPi1_64; + s2 = x2 * (long)CosPi5_64 + x3 * (long)CosPi27_64; + s3 = x2 * (long)CosPi27_64 - x3 * (long)CosPi5_64; + s4 = x4 * (long)CosPi9_64 + x5 * (long)CosPi23_64; + s5 = x4 * (long)CosPi23_64 - x5 * (long)CosPi9_64; + s6 = x6 * (long)CosPi13_64 + x7 * (long)CosPi19_64; + s7 = x6 * (long)CosPi19_64 - x7 * (long)CosPi13_64; + s8 = x8 * (long)CosPi17_64 + x9 * (long)CosPi15_64; + s9 = x8 * (long)CosPi15_64 - x9 * (long)CosPi17_64; + s10 = x10 * (long)CosPi21_64 + x11 * (long)CosPi11_64; + s11 = x10 * (long)CosPi11_64 - x11 * (long)CosPi21_64; + s12 = x12 * (long)CosPi25_64 + x13 * (long)CosPi7_64; + s13 = x12 * (long)CosPi7_64 - x13 * (long)CosPi25_64; + s14 = x14 * (long)CosPi29_64 + x15 * (long)CosPi3_64; + s15 = x14 * (long)CosPi3_64 - x15 * (long)CosPi29_64; + + x0 = HighbdWrapLow(DctConstRoundShift(s0 + s8), bd); + x1 = HighbdWrapLow(DctConstRoundShift(s1 + s9), bd); + x2 = HighbdWrapLow(DctConstRoundShift(s2 + s10), bd); + x3 = HighbdWrapLow(DctConstRoundShift(s3 + s11), bd); + x4 = HighbdWrapLow(DctConstRoundShift(s4 + s12), bd); + x5 = HighbdWrapLow(DctConstRoundShift(s5 + s13), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s6 + s14), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s7 + s15), bd); + x8 = HighbdWrapLow(DctConstRoundShift(s0 - s8), bd); + x9 = HighbdWrapLow(DctConstRoundShift(s1 - s9), bd); + x10 = HighbdWrapLow(DctConstRoundShift(s2 - s10), bd); + x11 = HighbdWrapLow(DctConstRoundShift(s3 - s11), bd); + x12 = HighbdWrapLow(DctConstRoundShift(s4 - s12), bd); + x13 = HighbdWrapLow(DctConstRoundShift(s5 - s13), bd); + x14 = HighbdWrapLow(DctConstRoundShift(s6 - s14), bd); + x15 = HighbdWrapLow(DctConstRoundShift(s7 - s15), bd); + + // stage 2 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4; + s5 = x5; + s6 = x6; + s7 = x7; + s8 = x8 * (long)CosPi4_64 + x9 * (long)CosPi28_64; + s9 = x8 * (long)CosPi28_64 - x9 * (long)CosPi4_64; + s10 = x10 * (long)CosPi20_64 + x11 * (long)CosPi12_64; + s11 = x10 * (long)CosPi12_64 - x11 * (long)CosPi20_64; + s12 = -x12 * (long)CosPi28_64 + x13 * (long)CosPi4_64; + s13 = x12 * (long)CosPi4_64 + x13 * (long)CosPi28_64; + s14 = -x14 * (long)CosPi12_64 + x15 * (long)CosPi20_64; + s15 = x14 * (long)CosPi20_64 + x15 * (long)CosPi12_64; + + x0 = HighbdWrapLow(s0 + s4, bd); + x1 = HighbdWrapLow(s1 + s5, bd); + x2 = HighbdWrapLow(s2 + s6, bd); + x3 = HighbdWrapLow(s3 + s7, bd); + x4 = HighbdWrapLow(s0 - s4, bd); + x5 = HighbdWrapLow(s1 - s5, bd); + x6 = HighbdWrapLow(s2 - s6, bd); + x7 = HighbdWrapLow(s3 - s7, bd); + x8 = HighbdWrapLow(DctConstRoundShift(s8 + s12), bd); + x9 = HighbdWrapLow(DctConstRoundShift(s9 + s13), bd); + x10 = HighbdWrapLow(DctConstRoundShift(s10 + s14), bd); + x11 = HighbdWrapLow(DctConstRoundShift(s11 + s15), bd); + x12 = HighbdWrapLow(DctConstRoundShift(s8 - s12), bd); + x13 = HighbdWrapLow(DctConstRoundShift(s9 - s13), bd); + x14 = HighbdWrapLow(DctConstRoundShift(s10 - s14), bd); + x15 = HighbdWrapLow(DctConstRoundShift(s11 - s15), bd); + + // stage 3 + s0 = x0; + s1 = x1; + s2 = x2; + s3 = x3; + s4 = x4 * (long)CosPi8_64 + x5 * (long)CosPi24_64; + s5 = x4 * (long)CosPi24_64 - x5 * (long)CosPi8_64; + s6 = -x6 * (long)CosPi24_64 + x7 * (long)CosPi8_64; + s7 = x6 * (long)CosPi8_64 + x7 * (long)CosPi24_64; + s8 = x8; + s9 = x9; + s10 = x10; + s11 = x11; + s12 = x12 * (long)CosPi8_64 + x13 * (long)CosPi24_64; + s13 = x12 * (long)CosPi24_64 - x13 * (long)CosPi8_64; + s14 = -x14 * (long)CosPi24_64 + x15 * (long)CosPi8_64; + s15 = x14 * (long)CosPi8_64 + x15 * (long)CosPi24_64; + + x0 = HighbdWrapLow(s0 + s2, bd); + x1 = HighbdWrapLow(s1 + s3, bd); + x2 = HighbdWrapLow(s0 - s2, bd); + x3 = HighbdWrapLow(s1 - s3, bd); + x4 = HighbdWrapLow(DctConstRoundShift(s4 + s6), bd); + x5 = HighbdWrapLow(DctConstRoundShift(s5 + s7), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s4 - s6), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s5 - s7), bd); + x8 = HighbdWrapLow(s8 + s10, bd); + x9 = HighbdWrapLow(s9 + s11, bd); + x10 = HighbdWrapLow(s8 - s10, bd); + x11 = HighbdWrapLow(s9 - s11, bd); + x12 = HighbdWrapLow(DctConstRoundShift(s12 + s14), bd); + x13 = HighbdWrapLow(DctConstRoundShift(s13 + s15), bd); + x14 = HighbdWrapLow(DctConstRoundShift(s12 - s14), bd); + x15 = HighbdWrapLow(DctConstRoundShift(s13 - s15), bd); + + // stage 4 + s2 = (long)(-CosPi16_64) * (x2 + x3); + s3 = (long)CosPi16_64 * (x2 - x3); + s6 = (long)CosPi16_64 * (x6 + x7); + s7 = (long)CosPi16_64 * (-x6 + x7); + s10 = (long)CosPi16_64 * (x10 + x11); + s11 = (long)CosPi16_64 * (-x10 + x11); + s14 = (long)(-CosPi16_64) * (x14 + x15); + s15 = (long)CosPi16_64 * (x14 - x15); + + x2 = HighbdWrapLow(DctConstRoundShift(s2), bd); + x3 = HighbdWrapLow(DctConstRoundShift(s3), bd); + x6 = HighbdWrapLow(DctConstRoundShift(s6), bd); + x7 = HighbdWrapLow(DctConstRoundShift(s7), bd); + x10 = HighbdWrapLow(DctConstRoundShift(s10), bd); + x11 = HighbdWrapLow(DctConstRoundShift(s11), bd); + x14 = HighbdWrapLow(DctConstRoundShift(s14), bd); + x15 = HighbdWrapLow(DctConstRoundShift(s15), bd); + + output[0] = HighbdWrapLow(x0, bd); + output[1] = HighbdWrapLow(-x8, bd); + output[2] = HighbdWrapLow(x12, bd); + output[3] = HighbdWrapLow(-x4, bd); + output[4] = HighbdWrapLow(x6, bd); + output[5] = HighbdWrapLow(x14, bd); + output[6] = HighbdWrapLow(x10, bd); + output[7] = HighbdWrapLow(x2, bd); + output[8] = HighbdWrapLow(x3, bd); + output[9] = HighbdWrapLow(x11, bd); + output[10] = HighbdWrapLow(x15, bd); + output[11] = HighbdWrapLow(x7, bd); + output[12] = HighbdWrapLow(x5, bd); + output[13] = HighbdWrapLow(-x13, bd); + output[14] = HighbdWrapLow(x9, bd); + output[15] = HighbdWrapLow(-x1, bd); + } + + public static void HighbdIdct16(ReadOnlySpan input, Span output, int bd) + { + Span step1 = stackalloc int[16]; + Span step2 = stackalloc int[16]; + long temp1, temp2; + + if (DetectInvalidHighbdInput(input, 16) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 16).Fill(0); + return; + } + + // stage 1 + step1[0] = input[0 / 2]; + step1[1] = input[16 / 2]; + step1[2] = input[8 / 2]; + step1[3] = input[24 / 2]; + step1[4] = input[4 / 2]; + step1[5] = input[20 / 2]; + step1[6] = input[12 / 2]; + step1[7] = input[28 / 2]; + step1[8] = input[2 / 2]; + step1[9] = input[18 / 2]; + step1[10] = input[10 / 2]; + step1[11] = input[26 / 2]; + step1[12] = input[6 / 2]; + step1[13] = input[22 / 2]; + step1[14] = input[14 / 2]; + step1[15] = input[30 / 2]; + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * (long)CosPi30_64 - step1[15] * (long)CosPi2_64; + temp2 = step1[8] * (long)CosPi2_64 + step1[15] * (long)CosPi30_64; + step2[8] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[15] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[9] * (long)CosPi14_64 - step1[14] * (long)CosPi18_64; + temp2 = step1[9] * (long)CosPi18_64 + step1[14] * (long)CosPi14_64; + step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[10] * (long)CosPi22_64 - step1[13] * (long)CosPi10_64; + temp2 = step1[10] * (long)CosPi10_64 + step1[13] * (long)CosPi22_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[11] * (long)CosPi6_64 - step1[12] * (long)CosPi26_64; + temp2 = step1[11] * (long)CosPi26_64 + step1[12] * (long)CosPi6_64; + step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * (long)CosPi28_64 - step2[7] * (long)CosPi4_64; + temp2 = step2[4] * (long)CosPi4_64 + step2[7] * (long)CosPi28_64; + step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = step2[5] * (long)CosPi12_64 - step2[6] * (long)CosPi20_64; + temp2 = step2[5] * (long)CosPi20_64 + step2[6] * (long)CosPi12_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + step1[8] = HighbdWrapLow(step2[8] + step2[9], bd); + step1[9] = HighbdWrapLow(step2[8] - step2[9], bd); + step1[10] = HighbdWrapLow(-step2[10] + step2[11], bd); + step1[11] = HighbdWrapLow(step2[10] + step2[11], bd); + step1[12] = HighbdWrapLow(step2[12] + step2[13], bd); + step1[13] = HighbdWrapLow(step2[12] - step2[13], bd); + step1[14] = HighbdWrapLow(-step2[14] + step2[15], bd); + step1[15] = HighbdWrapLow(step2[14] + step2[15], bd); + + // stage 4 + temp1 = (step1[0] + step1[1]) * (long)CosPi16_64; + temp2 = (step1[0] - step1[1]) * (long)CosPi16_64; + step2[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = step1[2] * (long)CosPi24_64 - step1[3] * (long)CosPi8_64; + temp2 = step1[2] * (long)CosPi8_64 + step1[3] * (long)CosPi24_64; + step2[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[4] = HighbdWrapLow(step1[4] + step1[5], bd); + step2[5] = HighbdWrapLow(step1[4] - step1[5], bd); + step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd); + step2[7] = HighbdWrapLow(step1[6] + step1[7], bd); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * (long)CosPi8_64 + step1[14] * (long)CosPi24_64; + temp2 = step1[9] * (long)CosPi24_64 + step1[14] * (long)CosPi8_64; + step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step1[10] * (long)CosPi24_64 - step1[13] * (long)CosPi8_64; + temp2 = -step1[10] * (long)CosPi8_64 + step1[13] * (long)CosPi24_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[11] = step1[11]; + step2[12] = step1[12]; + + // stage 5 + step1[0] = HighbdWrapLow(step2[0] + step2[3], bd); + step1[1] = HighbdWrapLow(step2[1] + step2[2], bd); + step1[2] = HighbdWrapLow(step2[1] - step2[2], bd); + step1[3] = HighbdWrapLow(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * (long)CosPi16_64; + temp2 = (step2[5] + step2[6]) * (long)CosPi16_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[7] = step2[7]; + + step1[8] = HighbdWrapLow(step2[8] + step2[11], bd); + step1[9] = HighbdWrapLow(step2[9] + step2[10], bd); + step1[10] = HighbdWrapLow(step2[9] - step2[10], bd); + step1[11] = HighbdWrapLow(step2[8] - step2[11], bd); + step1[12] = HighbdWrapLow(-step2[12] + step2[15], bd); + step1[13] = HighbdWrapLow(-step2[13] + step2[14], bd); + step1[14] = HighbdWrapLow(step2[13] + step2[14], bd); + step1[15] = HighbdWrapLow(step2[12] + step2[15], bd); + + // stage 6 + step2[0] = HighbdWrapLow(step1[0] + step1[7], bd); + step2[1] = HighbdWrapLow(step1[1] + step1[6], bd); + step2[2] = HighbdWrapLow(step1[2] + step1[5], bd); + step2[3] = HighbdWrapLow(step1[3] + step1[4], bd); + step2[4] = HighbdWrapLow(step1[3] - step1[4], bd); + step2[5] = HighbdWrapLow(step1[2] - step1[5], bd); + step2[6] = HighbdWrapLow(step1[1] - step1[6], bd); + step2[7] = HighbdWrapLow(step1[0] - step1[7], bd); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * (long)CosPi16_64; + temp2 = (step1[10] + step1[13]) * (long)CosPi16_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = (-step1[11] + step1[12]) * (long)CosPi16_64; + temp2 = (step1[11] + step1[12]) * (long)CosPi16_64; + step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[14] = step1[14]; + step2[15] = step1[15]; + + // stage 7 + output[0] = HighbdWrapLow(step2[0] + step2[15], bd); + output[1] = HighbdWrapLow(step2[1] + step2[14], bd); + output[2] = HighbdWrapLow(step2[2] + step2[13], bd); + output[3] = HighbdWrapLow(step2[3] + step2[12], bd); + output[4] = HighbdWrapLow(step2[4] + step2[11], bd); + output[5] = HighbdWrapLow(step2[5] + step2[10], bd); + output[6] = HighbdWrapLow(step2[6] + step2[9], bd); + output[7] = HighbdWrapLow(step2[7] + step2[8], bd); + output[8] = HighbdWrapLow(step2[7] - step2[8], bd); + output[9] = HighbdWrapLow(step2[6] - step2[9], bd); + output[10] = HighbdWrapLow(step2[5] - step2[10], bd); + output[11] = HighbdWrapLow(step2[4] - step2[11], bd); + output[12] = HighbdWrapLow(step2[3] - step2[12], bd); + output[13] = HighbdWrapLow(step2[2] - step2[13], bd); + output[14] = HighbdWrapLow(step2[1] - step2[14], bd); + output[15] = HighbdWrapLow(step2[0] - step2[15], bd); + } + + public static void HighbdIdct16x16256Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows + for (i = 0; i < 16; ++i) + { + HighbdIdct16(input, outptr, bd); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + HighbdIdct16(tempIn, tempOut, bd); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + public static void HighbdIdct16x1638Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 8x8 area, we only need to calculate first 8 rows here. + for (i = 0; i < 8; ++i) + { + HighbdIdct16(input, outptr, bd); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + Span destT = dest; + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + HighbdIdct16(tempIn, tempOut, bd); + for (j = 0; j < 16; ++j) + { + destT[i] = HighbdClipPixelAdd(destT[i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + destT = destT.Slice(stride); + } + } + } + + public static void HighbdIdct16x1610Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + + // First transform rows. Since all non-zero dct coefficients are in + // upper-left 4x4 area, we only need to calculate first 4 rows here. + for (i = 0; i < 4; ++i) + { + HighbdIdct16(input, outptr, bd); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Then transform columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + HighbdIdct16(tempIn, tempOut, bd); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + public static void HighbdIdct16x161Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + long a1; + int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd); + + output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd); + a1 = BitUtils.RoundPowerOfTwo(output, 6); + for (j = 0; j < 16; ++j) + { + for (i = 0; i < 16; ++i) + { + dest[i] = HighbdClipPixelAdd(dest[i], a1, bd); + } + + dest = dest.Slice(stride); + } + } + + public static void HighbdIdct32(ReadOnlySpan input, Span output, int bd) + { + Span step1 = stackalloc int[32]; + Span step2 = stackalloc int[32]; + long temp1, temp2; + + if (DetectInvalidHighbdInput(input, 32) != 0) + { + Debug.Assert(false, "invalid highbd txfm input"); + output.Slice(0, 32).Fill(0); + return; + } + + // stage 1 + step1[0] = input[0]; + step1[1] = input[16]; + step1[2] = input[8]; + step1[3] = input[24]; + step1[4] = input[4]; + step1[5] = input[20]; + step1[6] = input[12]; + step1[7] = input[28]; + step1[8] = input[2]; + step1[9] = input[18]; + step1[10] = input[10]; + step1[11] = input[26]; + step1[12] = input[6]; + step1[13] = input[22]; + step1[14] = input[14]; + step1[15] = input[30]; + + temp1 = input[1] * (long)CosPi31_64 - input[31] * (long)CosPi1_64; + temp2 = input[1] * (long)CosPi1_64 + input[31] * (long)CosPi31_64; + step1[16] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[31] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[17] * (long)CosPi15_64 - input[15] * (long)CosPi17_64; + temp2 = input[17] * (long)CosPi17_64 + input[15] * (long)CosPi15_64; + step1[17] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[30] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[9] * (long)CosPi23_64 - input[23] * (long)CosPi9_64; + temp2 = input[9] * (long)CosPi9_64 + input[23] * (long)CosPi23_64; + step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[25] * (long)CosPi7_64 - input[7] * (long)CosPi25_64; + temp2 = input[25] * (long)CosPi25_64 + input[7] * (long)CosPi7_64; + step1[19] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[28] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[5] * (long)CosPi27_64 - input[27] * (long)CosPi5_64; + temp2 = input[5] * (long)CosPi5_64 + input[27] * (long)CosPi27_64; + step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[21] * (long)CosPi11_64 - input[11] * (long)CosPi21_64; + temp2 = input[21] * (long)CosPi21_64 + input[11] * (long)CosPi11_64; + step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[13] * (long)CosPi19_64 - input[19] * (long)CosPi13_64; + temp2 = input[13] * (long)CosPi13_64 + input[19] * (long)CosPi19_64; + step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = input[29] * (long)CosPi3_64 - input[3] * (long)CosPi29_64; + temp2 = input[29] * (long)CosPi29_64 + input[3] * (long)CosPi3_64; + step1[23] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[24] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + // stage 2 + step2[0] = step1[0]; + step2[1] = step1[1]; + step2[2] = step1[2]; + step2[3] = step1[3]; + step2[4] = step1[4]; + step2[5] = step1[5]; + step2[6] = step1[6]; + step2[7] = step1[7]; + + temp1 = step1[8] * (long)CosPi30_64 - step1[15] * (long)CosPi2_64; + temp2 = step1[8] * (long)CosPi2_64 + step1[15] * (long)CosPi30_64; + step2[8] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[15] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[9] * (long)CosPi14_64 - step1[14] * (long)CosPi18_64; + temp2 = step1[9] * (long)CosPi18_64 + step1[14] * (long)CosPi14_64; + step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[10] * (long)CosPi22_64 - step1[13] * (long)CosPi10_64; + temp2 = step1[10] * (long)CosPi10_64 + step1[13] * (long)CosPi22_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + temp1 = step1[11] * (long)CosPi6_64 - step1[12] * (long)CosPi26_64; + temp2 = step1[11] * (long)CosPi26_64 + step1[12] * (long)CosPi6_64; + step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + step2[16] = HighbdWrapLow(step1[16] + step1[17], bd); + step2[17] = HighbdWrapLow(step1[16] - step1[17], bd); + step2[18] = HighbdWrapLow(-step1[18] + step1[19], bd); + step2[19] = HighbdWrapLow(step1[18] + step1[19], bd); + step2[20] = HighbdWrapLow(step1[20] + step1[21], bd); + step2[21] = HighbdWrapLow(step1[20] - step1[21], bd); + step2[22] = HighbdWrapLow(-step1[22] + step1[23], bd); + step2[23] = HighbdWrapLow(step1[22] + step1[23], bd); + step2[24] = HighbdWrapLow(step1[24] + step1[25], bd); + step2[25] = HighbdWrapLow(step1[24] - step1[25], bd); + step2[26] = HighbdWrapLow(-step1[26] + step1[27], bd); + step2[27] = HighbdWrapLow(step1[26] + step1[27], bd); + step2[28] = HighbdWrapLow(step1[28] + step1[29], bd); + step2[29] = HighbdWrapLow(step1[28] - step1[29], bd); + step2[30] = HighbdWrapLow(-step1[30] + step1[31], bd); + step2[31] = HighbdWrapLow(step1[30] + step1[31], bd); + + // stage 3 + step1[0] = step2[0]; + step1[1] = step2[1]; + step1[2] = step2[2]; + step1[3] = step2[3]; + + temp1 = step2[4] * (long)CosPi28_64 - step2[7] * (long)CosPi4_64; + temp2 = step2[4] * (long)CosPi4_64 + step2[7] * (long)CosPi28_64; + step1[4] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[7] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = step2[5] * (long)CosPi12_64 - step2[6] * (long)CosPi20_64; + temp2 = step2[5] * (long)CosPi20_64 + step2[6] * (long)CosPi12_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + + step1[8] = HighbdWrapLow(step2[8] + step2[9], bd); + step1[9] = HighbdWrapLow(step2[8] - step2[9], bd); + step1[10] = HighbdWrapLow(-step2[10] + step2[11], bd); + step1[11] = HighbdWrapLow(step2[10] + step2[11], bd); + step1[12] = HighbdWrapLow(step2[12] + step2[13], bd); + step1[13] = HighbdWrapLow(step2[12] - step2[13], bd); + step1[14] = HighbdWrapLow(-step2[14] + step2[15], bd); + step1[15] = HighbdWrapLow(step2[14] + step2[15], bd); + + step1[16] = step2[16]; + step1[31] = step2[31]; + temp1 = -step2[17] * (long)CosPi4_64 + step2[30] * (long)CosPi28_64; + temp2 = step2[17] * (long)CosPi28_64 + step2[30] * (long)CosPi4_64; + step1[17] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[30] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step2[18] * (long)CosPi28_64 - step2[29] * (long)CosPi4_64; + temp2 = -step2[18] * (long)CosPi4_64 + step2[29] * (long)CosPi28_64; + step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[19] = step2[19]; + step1[20] = step2[20]; + temp1 = -step2[21] * (long)CosPi20_64 + step2[26] * (long)CosPi12_64; + temp2 = step2[21] * (long)CosPi12_64 + step2[26] * (long)CosPi20_64; + step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step2[22] * (long)CosPi12_64 - step2[25] * (long)CosPi20_64; + temp2 = -step2[22] * (long)CosPi20_64 + step2[25] * (long)CosPi12_64; + step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[27] = step2[27]; + step1[28] = step2[28]; + + // stage 4 + temp1 = (step1[0] + step1[1]) * (long)CosPi16_64; + temp2 = (step1[0] - step1[1]) * (long)CosPi16_64; + step2[0] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[1] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = step1[2] * (long)CosPi24_64 - step1[3] * (long)CosPi8_64; + temp2 = step1[2] * (long)CosPi8_64 + step1[3] * (long)CosPi24_64; + step2[2] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[3] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[4] = HighbdWrapLow(step1[4] + step1[5], bd); + step2[5] = HighbdWrapLow(step1[4] - step1[5], bd); + step2[6] = HighbdWrapLow(-step1[6] + step1[7], bd); + step2[7] = HighbdWrapLow(step1[6] + step1[7], bd); + + step2[8] = step1[8]; + step2[15] = step1[15]; + temp1 = -step1[9] * (long)CosPi8_64 + step1[14] * (long)CosPi24_64; + temp2 = step1[9] * (long)CosPi24_64 + step1[14] * (long)CosPi8_64; + step2[9] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[14] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step1[10] * (long)CosPi24_64 - step1[13] * (long)CosPi8_64; + temp2 = -step1[10] * (long)CosPi8_64 + step1[13] * (long)CosPi24_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[11] = step1[11]; + step2[12] = step1[12]; + + step2[16] = HighbdWrapLow(step1[16] + step1[19], bd); + step2[17] = HighbdWrapLow(step1[17] + step1[18], bd); + step2[18] = HighbdWrapLow(step1[17] - step1[18], bd); + step2[19] = HighbdWrapLow(step1[16] - step1[19], bd); + step2[20] = HighbdWrapLow(-step1[20] + step1[23], bd); + step2[21] = HighbdWrapLow(-step1[21] + step1[22], bd); + step2[22] = HighbdWrapLow(step1[21] + step1[22], bd); + step2[23] = HighbdWrapLow(step1[20] + step1[23], bd); + + step2[24] = HighbdWrapLow(step1[24] + step1[27], bd); + step2[25] = HighbdWrapLow(step1[25] + step1[26], bd); + step2[26] = HighbdWrapLow(step1[25] - step1[26], bd); + step2[27] = HighbdWrapLow(step1[24] - step1[27], bd); + step2[28] = HighbdWrapLow(-step1[28] + step1[31], bd); + step2[29] = HighbdWrapLow(-step1[29] + step1[30], bd); + step2[30] = HighbdWrapLow(step1[29] + step1[30], bd); + step2[31] = HighbdWrapLow(step1[28] + step1[31], bd); + + // stage 5 + step1[0] = HighbdWrapLow(step2[0] + step2[3], bd); + step1[1] = HighbdWrapLow(step2[1] + step2[2], bd); + step1[2] = HighbdWrapLow(step2[1] - step2[2], bd); + step1[3] = HighbdWrapLow(step2[0] - step2[3], bd); + step1[4] = step2[4]; + temp1 = (step2[6] - step2[5]) * (long)CosPi16_64; + temp2 = (step2[5] + step2[6]) * (long)CosPi16_64; + step1[5] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[6] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[7] = step2[7]; + + step1[8] = HighbdWrapLow(step2[8] + step2[11], bd); + step1[9] = HighbdWrapLow(step2[9] + step2[10], bd); + step1[10] = HighbdWrapLow(step2[9] - step2[10], bd); + step1[11] = HighbdWrapLow(step2[8] - step2[11], bd); + step1[12] = HighbdWrapLow(-step2[12] + step2[15], bd); + step1[13] = HighbdWrapLow(-step2[13] + step2[14], bd); + step1[14] = HighbdWrapLow(step2[13] + step2[14], bd); + step1[15] = HighbdWrapLow(step2[12] + step2[15], bd); + + step1[16] = step2[16]; + step1[17] = step2[17]; + temp1 = -step2[18] * (long)CosPi8_64 + step2[29] * (long)CosPi24_64; + temp2 = step2[18] * (long)CosPi24_64 + step2[29] * (long)CosPi8_64; + step1[18] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[29] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step2[19] * (long)CosPi8_64 + step2[28] * (long)CosPi24_64; + temp2 = step2[19] * (long)CosPi24_64 + step2[28] * (long)CosPi8_64; + step1[19] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[28] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step2[20] * (long)CosPi24_64 - step2[27] * (long)CosPi8_64; + temp2 = -step2[20] * (long)CosPi8_64 + step2[27] * (long)CosPi24_64; + step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = -step2[21] * (long)CosPi24_64 - step2[26] * (long)CosPi8_64; + temp2 = -step2[21] * (long)CosPi8_64 + step2[26] * (long)CosPi24_64; + step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[22] = step2[22]; + step1[23] = step2[23]; + step1[24] = step2[24]; + step1[25] = step2[25]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // stage 6 + step2[0] = HighbdWrapLow(step1[0] + step1[7], bd); + step2[1] = HighbdWrapLow(step1[1] + step1[6], bd); + step2[2] = HighbdWrapLow(step1[2] + step1[5], bd); + step2[3] = HighbdWrapLow(step1[3] + step1[4], bd); + step2[4] = HighbdWrapLow(step1[3] - step1[4], bd); + step2[5] = HighbdWrapLow(step1[2] - step1[5], bd); + step2[6] = HighbdWrapLow(step1[1] - step1[6], bd); + step2[7] = HighbdWrapLow(step1[0] - step1[7], bd); + step2[8] = step1[8]; + step2[9] = step1[9]; + temp1 = (-step1[10] + step1[13]) * (long)CosPi16_64; + temp2 = (step1[10] + step1[13]) * (long)CosPi16_64; + step2[10] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[13] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = (-step1[11] + step1[12]) * (long)CosPi16_64; + temp2 = (step1[11] + step1[12]) * (long)CosPi16_64; + step2[11] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step2[12] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step2[14] = step1[14]; + step2[15] = step1[15]; + + step2[16] = HighbdWrapLow(step1[16] + step1[23], bd); + step2[17] = HighbdWrapLow(step1[17] + step1[22], bd); + step2[18] = HighbdWrapLow(step1[18] + step1[21], bd); + step2[19] = HighbdWrapLow(step1[19] + step1[20], bd); + step2[20] = HighbdWrapLow(step1[19] - step1[20], bd); + step2[21] = HighbdWrapLow(step1[18] - step1[21], bd); + step2[22] = HighbdWrapLow(step1[17] - step1[22], bd); + step2[23] = HighbdWrapLow(step1[16] - step1[23], bd); + + step2[24] = HighbdWrapLow(-step1[24] + step1[31], bd); + step2[25] = HighbdWrapLow(-step1[25] + step1[30], bd); + step2[26] = HighbdWrapLow(-step1[26] + step1[29], bd); + step2[27] = HighbdWrapLow(-step1[27] + step1[28], bd); + step2[28] = HighbdWrapLow(step1[27] + step1[28], bd); + step2[29] = HighbdWrapLow(step1[26] + step1[29], bd); + step2[30] = HighbdWrapLow(step1[25] + step1[30], bd); + step2[31] = HighbdWrapLow(step1[24] + step1[31], bd); + + // stage 7 + step1[0] = HighbdWrapLow(step2[0] + step2[15], bd); + step1[1] = HighbdWrapLow(step2[1] + step2[14], bd); + step1[2] = HighbdWrapLow(step2[2] + step2[13], bd); + step1[3] = HighbdWrapLow(step2[3] + step2[12], bd); + step1[4] = HighbdWrapLow(step2[4] + step2[11], bd); + step1[5] = HighbdWrapLow(step2[5] + step2[10], bd); + step1[6] = HighbdWrapLow(step2[6] + step2[9], bd); + step1[7] = HighbdWrapLow(step2[7] + step2[8], bd); + step1[8] = HighbdWrapLow(step2[7] - step2[8], bd); + step1[9] = HighbdWrapLow(step2[6] - step2[9], bd); + step1[10] = HighbdWrapLow(step2[5] - step2[10], bd); + step1[11] = HighbdWrapLow(step2[4] - step2[11], bd); + step1[12] = HighbdWrapLow(step2[3] - step2[12], bd); + step1[13] = HighbdWrapLow(step2[2] - step2[13], bd); + step1[14] = HighbdWrapLow(step2[1] - step2[14], bd); + step1[15] = HighbdWrapLow(step2[0] - step2[15], bd); + + step1[16] = step2[16]; + step1[17] = step2[17]; + step1[18] = step2[18]; + step1[19] = step2[19]; + temp1 = (-step2[20] + step2[27]) * (long)CosPi16_64; + temp2 = (step2[20] + step2[27]) * (long)CosPi16_64; + step1[20] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[27] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = (-step2[21] + step2[26]) * (long)CosPi16_64; + temp2 = (step2[21] + step2[26]) * (long)CosPi16_64; + step1[21] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[26] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = (-step2[22] + step2[25]) * (long)CosPi16_64; + temp2 = (step2[22] + step2[25]) * (long)CosPi16_64; + step1[22] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[25] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + temp1 = (-step2[23] + step2[24]) * (long)CosPi16_64; + temp2 = (step2[23] + step2[24]) * (long)CosPi16_64; + step1[23] = HighbdWrapLow(DctConstRoundShift(temp1), bd); + step1[24] = HighbdWrapLow(DctConstRoundShift(temp2), bd); + step1[28] = step2[28]; + step1[29] = step2[29]; + step1[30] = step2[30]; + step1[31] = step2[31]; + + // final stage + output[0] = HighbdWrapLow(step1[0] + step1[31], bd); + output[1] = HighbdWrapLow(step1[1] + step1[30], bd); + output[2] = HighbdWrapLow(step1[2] + step1[29], bd); + output[3] = HighbdWrapLow(step1[3] + step1[28], bd); + output[4] = HighbdWrapLow(step1[4] + step1[27], bd); + output[5] = HighbdWrapLow(step1[5] + step1[26], bd); + output[6] = HighbdWrapLow(step1[6] + step1[25], bd); + output[7] = HighbdWrapLow(step1[7] + step1[24], bd); + output[8] = HighbdWrapLow(step1[8] + step1[23], bd); + output[9] = HighbdWrapLow(step1[9] + step1[22], bd); + output[10] = HighbdWrapLow(step1[10] + step1[21], bd); + output[11] = HighbdWrapLow(step1[11] + step1[20], bd); + output[12] = HighbdWrapLow(step1[12] + step1[19], bd); + output[13] = HighbdWrapLow(step1[13] + step1[18], bd); + output[14] = HighbdWrapLow(step1[14] + step1[17], bd); + output[15] = HighbdWrapLow(step1[15] + step1[16], bd); + output[16] = HighbdWrapLow(step1[15] - step1[16], bd); + output[17] = HighbdWrapLow(step1[14] - step1[17], bd); + output[18] = HighbdWrapLow(step1[13] - step1[18], bd); + output[19] = HighbdWrapLow(step1[12] - step1[19], bd); + output[20] = HighbdWrapLow(step1[11] - step1[20], bd); + output[21] = HighbdWrapLow(step1[10] - step1[21], bd); + output[22] = HighbdWrapLow(step1[9] - step1[22], bd); + output[23] = HighbdWrapLow(step1[8] - step1[23], bd); + output[24] = HighbdWrapLow(step1[7] - step1[24], bd); + output[25] = HighbdWrapLow(step1[6] - step1[25], bd); + output[26] = HighbdWrapLow(step1[5] - step1[26], bd); + output[27] = HighbdWrapLow(step1[4] - step1[27], bd); + output[28] = HighbdWrapLow(step1[3] - step1[28], bd); + output[29] = HighbdWrapLow(step1[2] - step1[29], bd); + output[30] = HighbdWrapLow(step1[1] - step1[30], bd); + output[31] = HighbdWrapLow(step1[0] - step1[31], bd); + } + + public static void HighbdIdct32x321024Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + for (i = 0; i < 32; ++i) + { + int zeroCoeff = 0; + for (j = 0; j < 32; ++j) + { + zeroCoeff |= input[j]; + } + + if (zeroCoeff != 0) + { + HighbdIdct32(input, outptr, bd); + } + else + { + outptr.Slice(0, 32).Fill(0); + } + + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + HighbdIdct32(tempIn, tempOut, bd); + for (j = 0; j < 32; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + public static void HighbdIdct32x32135Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + // Only upper-left 16x16 has non-zero coeff + for (i = 0; i < 16; ++i) + { + HighbdIdct32(input, outptr, bd); + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + Span destT = dest; + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + HighbdIdct32(tempIn, tempOut, bd); + for (j = 0; j < 32; ++j) + { + destT[i] = HighbdClipPixelAdd(destT[i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + destT = destT.Slice(stride); + } + } + } + + public static void HighbdIdct32x3234Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + Span output = stackalloc int[32 * 32]; + Span outptr = output; + Span tempIn = stackalloc int[32]; + Span tempOut = stackalloc int[32]; + + // Rows + // Only upper-left 8x8 has non-zero coeff + for (i = 0; i < 8; ++i) + { + HighbdIdct32(input, outptr, bd); + input = input.Slice(32); + outptr = outptr.Slice(32); + } + + // Columns + for (i = 0; i < 32; ++i) + { + for (j = 0; j < 32; ++j) + { + tempIn[j] = output[j * 32 + i]; + } + + HighbdIdct32(tempIn, tempOut, bd); + for (j = 0; j < 32; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + public static void HighbdIdct32x321Add(ReadOnlySpan input, Span dest, int stride, int bd) + { + int i, j; + int a1; + int output = HighbdWrapLow(DctConstRoundShift(input[0] * (long)CosPi16_64), bd); + + output = HighbdWrapLow(DctConstRoundShift(output * (long)CosPi16_64), bd); + a1 = BitUtils.RoundPowerOfTwo(output, 6); + + for (j = 0; j < 32; ++j) + { + for (i = 0; i < 32; ++i) + { + dest[i] = HighbdClipPixelAdd(dest[i], a1, bd); + } + + dest = dest.Slice(stride); + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs new file mode 100644 index 00000000..0d5e8b6e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Prob.cs @@ -0,0 +1,73 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class Prob + { + public const int MaxProb = 255; + + private static byte GetProb(uint num, uint den) + { + Debug.Assert(den != 0); + { + int p = (int)(((ulong)num * 256 + (den >> 1)) / den); + // (p > 255) ? 255 : (p < 1) ? 1 : p; + int clippedProb = p | ((255 - p) >> 23) | (p == 0 ? 1 : 0); + return (byte)clippedProb; + } + } + + /* This function assumes prob1 and prob2 are already within [1,255] range. */ + public static byte WeightedProb(int prob1, int prob2, int factor) + { + return (byte)BitUtils.RoundPowerOfTwo(prob1 * (256 - factor) + prob2 * factor, 8); + } + + // MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; + private static readonly uint[] CountToUpdateFactor = new uint[] + { + 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, + 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 + }; + + private const int ModeMvCountSat = 20; + + public static byte ModeMvMergeProbs(byte preProb, uint ct0, uint ct1) + { + uint den = ct0 + ct1; + if (den == 0) + { + return preProb; + } + else + { + uint count = Math.Min(den, ModeMvCountSat); + uint factor = CountToUpdateFactor[(int)count]; + byte prob = GetProb(ct0, den); + return WeightedProb(preProb, prob, (int)factor); + } + } + + private static uint TreeMergeProbsImpl( + uint i, + sbyte[] tree, + ReadOnlySpan preProbs, + ReadOnlySpan counts, + Span probs) + { + int l = tree[i]; + uint leftCount = (l <= 0) ? counts[-l] : TreeMergeProbsImpl((uint)l, tree, preProbs, counts, probs); + int r = tree[i + 1]; + uint rightCount = (r <= 0) ? counts[-r] : TreeMergeProbsImpl((uint)r, tree, preProbs, counts, probs); + probs[(int)(i >> 1)] = ModeMvMergeProbs(preProbs[(int)(i >> 1)], leftCount, rightCount); + return leftCount + rightCount; + } + + public static void TreeMergeProbs(sbyte[] tree, ReadOnlySpan preProbs, ReadOnlySpan counts, Span probs) + { + TreeMergeProbsImpl(0, tree, preProbs, counts, probs); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs new file mode 100644 index 00000000..94aa6979 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Reader.cs @@ -0,0 +1,237 @@ +using System; +using System.Buffers.Binary; +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal struct Reader + { + private static readonly byte[] Norm = new byte[] + { + 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + private const int BdValueSize = sizeof(ulong) * 8; + + // This is meant to be a large, positive constant that can still be efficiently + // loaded as an immediate (on platforms like ARM, for example). + // Even relatively modest values like 100 would work fine. + private const int LotsOfBits = 0x40000000; + + public ulong Value; + public uint Range; + public int Count; + private ArrayPtr _buffer; + + public bool Init(ArrayPtr buffer, int size) + { + if (size != 0 && buffer.IsNull) + { + return true; + } + else + { + _buffer = new ArrayPtr(ref buffer[0], size); + Value = 0; + Count = -8; + Range = 255; + Fill(); + return ReadBit() != 0; // Marker bit + } + } + + private void Fill() + { + ReadOnlySpan buffer = _buffer.ToSpan(); + ReadOnlySpan bufferStart = buffer; + ulong value = Value; + int count = Count; + ulong bytesLeft = (ulong)buffer.Length; + ulong bitsLeft = bytesLeft * 8; + int shift = BdValueSize - 8 - (count + 8); + + if (bitsLeft > BdValueSize) + { + int bits = (shift & unchecked((int)0xfffffff8)) + 8; + ulong nv; + ulong bigEndianValues = BinaryPrimitives.ReadUInt64BigEndian(buffer); + nv = bigEndianValues >> (BdValueSize - bits); + count += bits; + buffer = buffer.Slice(bits >> 3); + value = Value | (nv << (shift & 0x7)); + } + else + { + int bitsOver = shift + 8 - (int)bitsLeft; + int loopEnd = 0; + if (bitsOver >= 0) + { + count += LotsOfBits; + loopEnd = bitsOver; + } + + if (bitsOver < 0 || bitsLeft != 0) + { + while (shift >= loopEnd) + { + count += 8; + value |= (ulong)buffer[0] << shift; + buffer = buffer.Slice(1); + shift -= 8; + } + } + } + + // NOTE: Variable 'buffer' may not relate to '_buffer' after decryption, + // so we increase '_buffer' by the amount that 'buffer' moved, rather than + // assign 'buffer' to '_buffer'. + _buffer = _buffer.Slice(bufferStart.Length - buffer.Length); + Value = value; + Count = count; + } + + public bool HasError() + { + // Check if we have reached the end of the buffer. + // + // Variable 'count' stores the number of bits in the 'value' buffer, minus + // 8. The top byte is part of the algorithm, and the remainder is buffered + // to be shifted into it. So if count == 8, the top 16 bits of 'value' are + // occupied, 8 for the algorithm and 8 in the buffer. + // + // When reading a byte from the user's buffer, count is filled with 8 and + // one byte is filled into the value buffer. When we reach the end of the + // data, count is additionally filled with LotsOfBits. So when + // count == LotsOfBits - 1, the user's data has been exhausted. + // + // 1 if we have tried to decode bits after the end of stream was encountered. + // 0 No error. + return Count > BdValueSize && Count < LotsOfBits; + } + + public int Read(int prob) + { + uint bit = 0; + ulong value; + ulong bigsplit; + int count; + uint range; + uint split = (Range * (uint)prob + (256 - (uint)prob)) >> 8; + + if (Count < 0) + { + Fill(); + } + + value = Value; + count = Count; + + bigsplit = (ulong)split << (BdValueSize - 8); + + range = split; + + if (value >= bigsplit) + { + range = Range - split; + value -= bigsplit; + bit = 1; + } + + { + int shift = Norm[range]; + range <<= shift; + value <<= shift; + count -= shift; + } + Value = value; + Count = count; + Range = range; + + return (int)bit; + } + + public int ReadBit() + { + return Read(128); // vpx_prob_half + } + + public int ReadLiteral(int bits) + { + int literal = 0, bit; + + for (bit = bits - 1; bit >= 0; bit--) + { + literal |= ReadBit() << bit; + } + + return literal; + } + + public int ReadTree(ReadOnlySpan tree, ReadOnlySpan probs) + { + sbyte i = 0; + + while ((i = tree[i + Read(probs[i >> 1])]) > 0) + { + continue; + } + + return -i; + } + + public int ReadBool(int prob, ref ulong value, ref int count, ref uint range) + { + uint split = (range * (uint)prob + (256 - (uint)prob)) >> 8; + ulong bigsplit = (ulong)split << (BdValueSize - 8); + + if (count < 0) + { + Value = value; + Count = count; + Fill(); + value = Value; + count = Count; + } + + if (value >= bigsplit) + { + range = range - split; + value = value - bigsplit; + { + int shift = Norm[range]; + range <<= shift; + value <<= shift; + count -= shift; + } + return 1; + } + range = split; + { + int shift = Norm[range]; + range <<= shift; + value <<= shift; + count -= shift; + } + return 0; + } + + public ArrayPtr FindEnd() + { + // Find the end of the coded buffer + while (Count > 8 && Count < BdValueSize) + { + Count -= 8; + _buffer = _buffer.Slice(-1); + } + return _buffer; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs new file mode 100644 index 00000000..e041f2e0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/TxfmCommon.cs @@ -0,0 +1,54 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp +{ + internal static class TxfmCommon + { + // Constants used by all idct/dct functions + public const int DctConstBits = 14; + public const int DctConstRounding = 1 << (DctConstBits - 1); + + public const int UnitQuantShift = 2; + public const int UnitQuantFactor = 1 << UnitQuantShift; + + // Constants: + // for (int i = 1; i < 32; ++i) + // Console.WriteLine("public const short CosPi{0}_64 = {1};", i, MathF.Round(16384 * MathF.Cos(i * MathF.PI / 64))); + // Note: sin(k * Pi / 64) = cos((32 - k) * Pi / 64) + public const short CosPi1_64 = 16364; + public const short CosPi2_64 = 16305; + public const short CosPi3_64 = 16207; + public const short CosPi4_64 = 16069; + public const short CosPi5_64 = 15893; + public const short CosPi6_64 = 15679; + public const short CosPi7_64 = 15426; + public const short CosPi8_64 = 15137; + public const short CosPi9_64 = 14811; + public const short CosPi10_64 = 14449; + public const short CosPi11_64 = 14053; + public const short CosPi12_64 = 13623; + public const short CosPi13_64 = 13160; + public const short CosPi14_64 = 12665; + public const short CosPi15_64 = 12140; + public const short CosPi16_64 = 11585; + public const short CosPi17_64 = 11003; + public const short CosPi18_64 = 10394; + public const short CosPi19_64 = 9760; + public const short CosPi20_64 = 9102; + public const short CosPi21_64 = 8423; + public const short CosPi22_64 = 7723; + public const short CosPi23_64 = 7005; + public const short CosPi24_64 = 6270; + public const short CosPi25_64 = 5520; + public const short CosPi26_64 = 4756; + public const short CosPi27_64 = 3981; + public const short CosPi28_64 = 3196; + public const short CosPi29_64 = 2404; + public const short CosPi30_64 = 1606; + public const short CosPi31_64 = 804; + + // 16384 * sqrt(2) * sin(kPi / 9) * 2 / 3 + public const short SinPi1_9 = 5283; + public const short SinPi2_9 = 9929; + public const short SinPi3_9 = 13377; + public const short SinPi4_9 = 15212; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs new file mode 100644 index 00000000..9fa5842a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Idct.cs @@ -0,0 +1,536 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class Idct + { + private delegate void Transform1D(ReadOnlySpan input, Span output); + private delegate void HighbdTransform1D(ReadOnlySpan input, Span output, int bd); + + private struct Transform2D + { + public Transform1D Cols, Rows; // Vertical and horizontal + + public Transform2D(Transform1D cols, Transform1D rows) + { + Cols = cols; + Rows = rows; + } + } + + private struct HighbdTransform2D + { + public HighbdTransform1D Cols, Rows; // Vertical and horizontal + + public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows) + { + Cols = cols; + Rows = rows; + } + } + + private static readonly Transform2D[] Iht4 = new Transform2D[] + { + new Transform2D(Idct4, Idct4), // DCT_DCT = 0 + new Transform2D(Iadst4, Idct4), // ADST_DCT = 1 + new Transform2D(Idct4, Iadst4), // DCT_ADST = 2 + new Transform2D(Iadst4, Iadst4) // ADST_ADST = 3 + }; + + public static void Iht4x416Add(ReadOnlySpan input, Span dest, int stride, int txType) + { + int i, j; + Span output = stackalloc int[4 * 4]; + Span outptr = output; + Span tempIn = stackalloc int[4]; + Span tempOut = stackalloc int[4]; + + // Inverse transform row vectors + for (i = 0; i < 4; ++i) + { + Iht4[txType].Rows(input, outptr); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Inverse transform column vectors + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + Iht4[txType].Cols(tempIn, tempOut); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4)); + } + } + } + + private static readonly Transform2D[] Iht8 = new Transform2D[] + { + new Transform2D(Idct8, Idct8), // DCT_DCT = 0 + new Transform2D(Iadst8, Idct8), // ADST_DCT = 1 + new Transform2D(Idct8, Iadst8), // DCT_ADST = 2 + new Transform2D(Iadst8, Iadst8) // ADST_ADST = 3 + }; + + public static void Iht8x864Add(ReadOnlySpan input, Span dest, int stride, int txType) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + Transform2D ht = Iht8[txType]; + + // Inverse transform row vectors + for (i = 0; i < 8; ++i) + { + ht.Rows(input, outptr); + input = input.Slice(8); + outptr = outptr.Slice(8); + } + + // Inverse transform column vectors + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + ht.Cols(tempIn, tempOut); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5)); + } + } + } + + private static readonly Transform2D[] Iht16 = new Transform2D[] + { + new Transform2D(Idct16, Idct16), // DCT_DCT = 0 + new Transform2D(Iadst16, Idct16), // ADST_DCT = 1 + new Transform2D(Idct16, Iadst16), // DCT_ADST = 2 + new Transform2D(Iadst16, Iadst16) // ADST_ADST = 3 + }; + + public static void Iht16x16256Add(ReadOnlySpan input, Span dest, int stride, int txType) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + Transform2D ht = Iht16[txType]; + + // Rows + for (i = 0; i < 16; ++i) + { + ht.Rows(input, outptr); + input = input.Slice(16); + outptr = outptr.Slice(16); + } + + // Columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + ht.Cols(tempIn, tempOut); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); + } + } + } + + // Idct + public static void Idct4x4Add(ReadOnlySpan input, Span dest, int stride, int eob) + { + if (eob > 1) + { + Idct4x416Add(input, dest, stride); + } + else + { + Idct4x41Add(input, dest, stride); + } + } + + public static void Iwht4x4Add(ReadOnlySpan input, Span dest, int stride, int eob) + { + if (eob > 1) + { + Iwht4x416Add(input, dest, stride); + } + else + { + Iwht4x41Add(input, dest, stride); + } + } + + public static void Idct8x8Add(ReadOnlySpan input, Span dest, int stride, int eob) + { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + if (eob == 1) + { + // DC only DCT coefficient + Idct8x81Add(input, dest, stride); + } + else if (eob <= 12) + { + Idct8x812Add(input, dest, stride); + } + else + { + Idct8x864Add(input, dest, stride); + } + } + + public static void Idct16x16Add(ReadOnlySpan input, Span dest, int stride, int eob) + { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob == 1) /* DC only DCT coefficient. */ + { + Idct16x161Add(input, dest, stride); + } + else if (eob <= 10) + { + Idct16x1610Add(input, dest, stride); + } + else if (eob <= 38) + { + Idct16x1638Add(input, dest, stride); + } + else + { + Idct16x16256Add(input, dest, stride); + } + } + + public static void Idct32x32Add(ReadOnlySpan input, Span dest, int stride, int eob) + { + if (eob == 1) + { + Idct32x321Add(input, dest, stride); + } + else if (eob <= 34) + { + // Non-zero coeff only in upper-left 8x8 + Idct32x3234Add(input, dest, stride); + } + else if (eob <= 135) + { + // Non-zero coeff only in upper-left 16x16 + Idct32x32135Add(input, dest, stride); + } + else + { + Idct32x321024Add(input, dest, stride); + } + } + + // Iht + public static void Iht4x4Add(TxType txType, ReadOnlySpan input, Span dest, int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct4x4Add(input, dest, stride, eob); + } + else + { + Iht4x416Add(input, dest, stride, (int)txType); + } + } + + public static void Iht8x8Add(TxType txType, ReadOnlySpan input, Span dest, int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct8x8Add(input, dest, stride, eob); + } + else + { + Iht8x864Add(input, dest, stride, (int)txType); + } + } + + public static void Iht16x16Add(TxType txType, ReadOnlySpan input, Span dest, + int stride, int eob) + { + if (txType == TxType.DctDct) + { + Idct16x16Add(input, dest, stride, eob); + } + else + { + Iht16x16256Add(input, dest, stride, (int)txType); + } + } + + private static readonly HighbdTransform2D[] HighbdIht4 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst4, HighbdIadst4) // ADST_ADST = 3 + }; + + public static void HighbdIht4x416Add(ReadOnlySpan input, Span dest, int stride, int txType, int bd) + { + int i, j; + Span output = stackalloc int[4 * 4]; + Span outptr = output; + Span tempIn = stackalloc int[4]; + Span tempOut = stackalloc int[4]; + + // Inverse transform row vectors. + for (i = 0; i < 4; ++i) + { + HighbdIht4[txType].Rows(input, outptr, bd); + input = input.Slice(4); + outptr = outptr.Slice(4); + } + + // Inverse transform column vectors. + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + tempIn[j] = output[j * 4 + i]; + } + + HighbdIht4[txType].Cols(tempIn, tempOut, bd); + for (j = 0; j < 4; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd); + } + } + } + + private static readonly HighbdTransform2D[] HighIht8 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst8, HighbdIadst8) // ADST_ADST = 3 + }; + + public static void HighbdIht8x864Add(ReadOnlySpan input, Span dest, int stride, int txType, int bd) + { + int i, j; + Span output = stackalloc int[8 * 8]; + Span outptr = output; + Span tempIn = stackalloc int[8]; + Span tempOut = stackalloc int[8]; + HighbdTransform2D ht = HighIht8[txType]; + + // Inverse transform row vectors. + for (i = 0; i < 8; ++i) + { + ht.Rows(input, outptr, bd); + input = input.Slice(8); + outptr = output.Slice(8); + } + + // Inverse transform column vectors. + for (i = 0; i < 8; ++i) + { + for (j = 0; j < 8; ++j) + { + tempIn[j] = output[j * 8 + i]; + } + + ht.Cols(tempIn, tempOut, bd); + for (j = 0; j < 8; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd); + } + } + } + + private static readonly HighbdTransform2D[] HighIht16 = new HighbdTransform2D[] + { + new HighbdTransform2D(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0 + new HighbdTransform2D(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1 + new HighbdTransform2D(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2 + new HighbdTransform2D(HighbdIadst16, HighbdIadst16) // ADST_ADST = 3 + }; + + public static void HighbdIht16x16256Add(ReadOnlySpan input, Span dest, int stride, int txType, int bd) + { + int i, j; + Span output = stackalloc int[16 * 16]; + Span outptr = output; + Span tempIn = stackalloc int[16]; + Span tempOut = stackalloc int[16]; + HighbdTransform2D ht = HighIht16[txType]; + + // Rows + for (i = 0; i < 16; ++i) + { + ht.Rows(input, outptr, bd); + input = input.Slice(16); + outptr = output.Slice(16); + } + + // Columns + for (i = 0; i < 16; ++i) + { + for (j = 0; j < 16; ++j) + { + tempIn[j] = output[j * 16 + i]; + } + + ht.Cols(tempIn, tempOut, bd); + for (j = 0; j < 16; ++j) + { + dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); + } + } + } + + // Idct + public static void HighbdIdct4x4Add(ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + if (eob > 1) + { + HighbdIdct4x416Add(input, dest, stride, bd); + } + else + { + HighbdIdct4x41Add(input, dest, stride, bd); + } + } + + public static void HighbdIwht4x4Add(ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + if (eob > 1) + { + HighbdIwht4x416Add(input, dest, stride, bd); + } + else + { + HighbdIwht4x41Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct8x8Add(ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // DC only DCT coefficient + if (eob == 1) + { + vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd); + } + else if (eob <= 12) + { + HighbdIdct8x812Add(input, dest, stride, bd); + } + else + { + HighbdIdct8x864Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct16x16Add(ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to separate different cases. + // DC only DCT coefficient. + if (eob == 1) + { + HighbdIdct16x161Add(input, dest, stride, bd); + } + else if (eob <= 10) + { + HighbdIdct16x1610Add(input, dest, stride, bd); + } + else if (eob <= 38) + { + HighbdIdct16x1638Add(input, dest, stride, bd); + } + else + { + HighbdIdct16x16256Add(input, dest, stride, bd); + } + } + + public static void HighbdIdct32x32Add(ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + // Non-zero coeff only in upper-left 8x8 + if (eob == 1) + { + HighbdIdct32x321Add(input, dest, stride, bd); + } + else if (eob <= 34) + { + HighbdIdct32x3234Add(input, dest, stride, bd); + } + else if (eob <= 135) + { + HighbdIdct32x32135Add(input, dest, stride, bd); + } + else + { + HighbdIdct32x321024Add(input, dest, stride, bd); + } + } + + // Iht + public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct4x4Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht4x416Add(input, dest, stride, (int)txType, bd); + } + } + + public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct8x8Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht8x864Add(input, dest, stride, (int)txType, bd); + } + } + + public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan input, Span dest, int stride, int eob, int bd) + { + if (txType == TxType.DctDct) + { + HighbdIdct16x16Add(input, dest, stride, eob, bd); + } + else + { + HighbdIht16x16256Add(input, dest, stride, (int)txType, bd); + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs new file mode 100644 index 00000000..baa0ab99 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorException.cs @@ -0,0 +1,15 @@ +using System; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + class InternalErrorException : Exception + { + public InternalErrorException(string message) : base(message) + { + } + + public InternalErrorException(string message, Exception innerException) : base(message, innerException) + { + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs new file mode 100644 index 00000000..68e9cb4b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/InternalErrorInfo.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal struct InternalErrorInfo + { + public CodecErr ErrorCode; + + public void InternalError(CodecErr error, string message) + { + ErrorCode = error; + + throw new InternalErrorException(message); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs b/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs new file mode 100644 index 00000000..13006934 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/LoopFilter.cs @@ -0,0 +1,418 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class LoopFilter + { + public const int MaxLoopFilter = 63; + + public const int MaxRefLfDeltas = 4; + public const int MaxModeLfDeltas = 2; + + // 64 bit masks for left transform size. Each 1 represents a position where + // we should apply a loop filter across the left border of an 8x8 block + // boundary. + // + // In the case of TX_16X16 -> ( in low order byte first we end up with + // a mask that looks like this + // + // 10101010 + // 10101010 + // 10101010 + // 10101010 + // 10101010 + // 10101010 + // 10101010 + // 10101010 + // + // A loopfilter should be applied to every other 8x8 horizontally. + private static readonly ulong[] Left64X64TxformMask = new ulong[] + { + 0xffffffffffffffffUL, // TX_4X4 + 0xffffffffffffffffUL, // TX_8x8 + 0x5555555555555555UL, // TX_16x16 + 0x1111111111111111UL, // TX_32x32 + }; + + // 64 bit masks for above transform size. Each 1 represents a position where + // we should apply a loop filter across the top border of an 8x8 block + // boundary. + // + // In the case of TX_32x32 -> ( in low order byte first we end up with + // a mask that looks like this + // + // 11111111 + // 00000000 + // 00000000 + // 00000000 + // 11111111 + // 00000000 + // 00000000 + // 00000000 + // + // A loopfilter should be applied to every other 4 the row vertically. + private static readonly ulong[] Above64X64TxformMask = new ulong[] + { + 0xffffffffffffffffUL, // TX_4X4 + 0xffffffffffffffffUL, // TX_8x8 + 0x00ff00ff00ff00ffUL, // TX_16x16 + 0x000000ff000000ffUL, // TX_32x32 + }; + + // 64 bit masks for prediction sizes (left). Each 1 represents a position + // where left border of an 8x8 block. These are aligned to the right most + // appropriate bit, and then shifted into place. + // + // In the case of TX_16x32 -> ( low order byte first ) we end up with + // a mask that looks like this : + // + // 10000000 + // 10000000 + // 10000000 + // 10000000 + // 00000000 + // 00000000 + // 00000000 + // 00000000 + private static readonly ulong[] LeftPredictionMask = new ulong[] + { + 0x0000000000000001UL, // BLOCK_4X4, + 0x0000000000000001UL, // BLOCK_4X8, + 0x0000000000000001UL, // BLOCK_8X4, + 0x0000000000000001UL, // BLOCK_8X8, + 0x0000000000000101UL, // BLOCK_8X16, + 0x0000000000000001UL, // BLOCK_16X8, + 0x0000000000000101UL, // BLOCK_16X16, + 0x0000000001010101UL, // BLOCK_16X32, + 0x0000000000000101UL, // BLOCK_32X16, + 0x0000000001010101UL, // BLOCK_32X32, + 0x0101010101010101UL, // BLOCK_32X64, + 0x0000000001010101UL, // BLOCK_64X32, + 0x0101010101010101UL, // BLOCK_64X64 + }; + + // 64 bit mask to shift and set for each prediction size. + private static readonly ulong[] AbovePredictionMask = new ulong[] + { + 0x0000000000000001UL, // BLOCK_4X4 + 0x0000000000000001UL, // BLOCK_4X8 + 0x0000000000000001UL, // BLOCK_8X4 + 0x0000000000000001UL, // BLOCK_8X8 + 0x0000000000000001UL, // BLOCK_8X16, + 0x0000000000000003UL, // BLOCK_16X8 + 0x0000000000000003UL, // BLOCK_16X16 + 0x0000000000000003UL, // BLOCK_16X32, + 0x000000000000000fUL, // BLOCK_32X16, + 0x000000000000000fUL, // BLOCK_32X32, + 0x000000000000000fUL, // BLOCK_32X64, + 0x00000000000000ffUL, // BLOCK_64X32, + 0x00000000000000ffUL, // BLOCK_64X64 + }; + + // 64 bit mask to shift and set for each prediction size. A bit is set for + // each 8x8 block that would be in the left most block of the given block + // size in the 64x64 block. + private static readonly ulong[] SizeMask = new ulong[] + { + 0x0000000000000001UL, // BLOCK_4X4 + 0x0000000000000001UL, // BLOCK_4X8 + 0x0000000000000001UL, // BLOCK_8X4 + 0x0000000000000001UL, // BLOCK_8X8 + 0x0000000000000101UL, // BLOCK_8X16, + 0x0000000000000003UL, // BLOCK_16X8 + 0x0000000000000303UL, // BLOCK_16X16 + 0x0000000003030303UL, // BLOCK_16X32, + 0x0000000000000f0fUL, // BLOCK_32X16, + 0x000000000f0f0f0fUL, // BLOCK_32X32, + 0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64, + 0x00000000ffffffffUL, // BLOCK_64X32, + 0xffffffffffffffffUL, // BLOCK_64X64 + }; + + // These are used for masking the left and above borders. + private const ulong LeftBorder = 0x1111111111111111UL; + private const ulong AboveBorder = 0x000000ff000000ffUL; + + // 16 bit masks for uv transform sizes. + private static readonly ushort[] Left64X64TxformMaskUv = new ushort[] + { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x5555, // TX_16x16 + 0x1111, // TX_32x32 + }; + + private static readonly ushort[] Above64X64TxformMaskUv = new ushort[] + { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x0f0f, // TX_16x16 + 0x000f, // TX_32x32 + }; + + // 16 bit left mask to shift and set for each uv prediction size. + private static readonly ushort[] LeftPredictionMaskUv = new ushort[] + { + 0x0001, // BLOCK_4X4, + 0x0001, // BLOCK_4X8, + 0x0001, // BLOCK_8X4, + 0x0001, // BLOCK_8X8, + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8, + 0x0001, // BLOCK_16X16, + 0x0011, // BLOCK_16X32, + 0x0001, // BLOCK_32X16, + 0x0011, // BLOCK_32X32, + 0x1111, // BLOCK_32X64 + 0x0011, // BLOCK_64X32, + 0x1111, // BLOCK_64X64 + }; + + // 16 bit above mask to shift and set for uv each prediction size. + private static readonly ushort[] AbovePredictionMaskUv = new ushort[] + { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0001, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0003, // BLOCK_32X32, + 0x0003, // BLOCK_32X64, + 0x000f, // BLOCK_64X32, + 0x000f, // BLOCK_64X64 + }; + + // 64 bit mask to shift and set for each uv prediction size + private static readonly ushort[] SizeMaskUv = new ushort[] + { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0011, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0033, // BLOCK_32X32, + 0x3333, // BLOCK_32X64, + 0x00ff, // BLOCK_64X32, + 0xffff, // BLOCK_64X64 + }; + + private const ushort LeftBorderUv = 0x1111; + private const ushort AboveBorderUv = 0x000f; + + private static readonly int[] ModeLfLut = new int[] + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES + 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) + }; + + private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi) + { + return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][ModeLfLut[(int)mi.Mode]]; + } + + private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol) + { + return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)]; + } + + // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 + // or greater area. + private static readonly byte[][] FirstBlockIn16x16 = new byte[][] + { + new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, + new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, + new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, + new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + // This function sets up the bit masks for a block represented + // by miRow, miCol in a 64x64 region. + public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh) + { + BlockSize blockSize = mi.SbType; + TxSize txSizeY = mi.TxSize; + ref LoopFilterInfoN lfiN = ref cm.LfInfo; + int filterLevel = GetFilterLevel(ref lfiN, ref mi); + TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1]; + ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol); + ref ulong leftY = ref lfm.LeftY[(int)txSizeY]; + ref ulong aboveY = ref lfm.AboveY[(int)txSizeY]; + ref ulong int4X4Y = ref lfm.Int4x4Y; + ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv]; + ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv]; + ref ushort int4X4Uv = ref lfm.Int4x4Uv; + int rowInSb = (miRow & 7); + int colInSb = (miCol & 7); + int shiftY = colInSb + (rowInSb << 3); + int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2); + int buildUv = FirstBlockIn16x16[rowInSb][colInSb]; + + if (filterLevel == 0) + { + return; + } + else + { + int index = shiftY; + int i; + for (i = 0; i < bh; i++) + { + MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index).Slice(0, bw).Fill((byte)filterLevel); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + aboveY |= AbovePredictionMask[(int)blockSize] << shiftY; + leftY |= LeftPredictionMask[(int)blockSize] << shiftY; + + if (buildUv != 0) + { + aboveUv |= (ushort)(AbovePredictionMaskUv[(int)blockSize] << shiftUv); + leftUv |= (ushort)(LeftPredictionMaskUv[(int)blockSize] << shiftUv); + } + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mi.Skip != 0 && mi.IsInterBlock()) + { + return; + } + + // Add a mask for the transform size. The transform size mask is set to + // be correct for a 64x64 prediction block size. Mask to match the size of + // the block we are working on and then shift it into place. + aboveY |= (SizeMask[(int)blockSize] & Above64X64TxformMask[(int)txSizeY]) << shiftY; + leftY |= (SizeMask[(int)blockSize] & Left64X64TxformMask[(int)txSizeY]) << shiftY; + + if (buildUv != 0) + { + aboveUv |= (ushort)((SizeMaskUv[(int)blockSize] & Above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv); + leftUv |= (ushort)((SizeMaskUv[(int)blockSize] & Left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv); + } + + // Try to determine what to do with the internal 4x4 block boundaries. These + // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the + // internal ones can be skipped and don't depend on the prediction block size. + if (txSizeY == TxSize.Tx4x4) + { + int4X4Y |= SizeMask[(int)blockSize] << shiftY; + } + + if (buildUv != 0 && txSizeUv == TxSize.Tx4x4) + { + int4X4Uv |= (ushort)((SizeMaskUv[(int)blockSize] & 0xffff) << shiftUv); + } + } + + public static unsafe void ResetLfm(ref Vp9Common cm) + { + if (cm.Lf.FilterLevel != 0) + { + MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride); + } + } + + private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl) + { + int lvl; + + // For each possible value for the loop filter fill out limits + for (lvl = 0; lvl <= MaxLoopFilter; lvl++) + { + // Set loop filter parameters that control sharpness. + int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0)); + + if (sharpnessLvl > 0) + { + if (blockInsideLimit > (9 - sharpnessLvl)) + { + blockInsideLimit = (9 - sharpnessLvl); + } + } + + if (blockInsideLimit < 1) + { + blockInsideLimit = 1; + } + + lfi.Lfthr[lvl].Lim.ToSpan().Fill((byte)blockInsideLimit); + lfi.Lfthr[lvl].Mblim.ToSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit)); + } + } + + public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl) + { + int segId; + // nShift is the multiplier for lfDeltas + // the multiplier is 1 for when filterLvl is between 0 and 31; + // 2 when filterLvl is between 32 and 63 + int scale = 1 << (defaultFiltLvl >> 5); + ref LoopFilterInfoN lfi = ref cm.LfInfo; + ref Types.LoopFilter lf = ref cm.Lf; + ref Segmentation seg = ref cm.Seg; + + // Update limits if sharpness has changed + if (lf.LastSharpnessLevel != lf.SharpnessLevel) + { + UpdateSharpness(ref lfi, lf.SharpnessLevel); + lf.LastSharpnessLevel = lf.SharpnessLevel; + } + + for (segId = 0; segId < Constants.MaxSegments; segId++) + { + int lvlSeg = defaultFiltLvl; + if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0) + { + int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf); + lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter); + } + + if (!lf.ModeRefDeltaEnabled) + { + // We could get rid of this if we assume that deltas are set to + // zero when not in use; encoder always uses deltas + MemoryMarshal.Cast, byte>(lfi.Lvl[segId].ToSpan()).Fill((byte)lvlSeg); + } + else + { + int refr, mode; + int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale; + lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter); + + for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr) + { + for (mode = 0; mode < MaxModeLfDeltas; ++mode) + { + int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale; + lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter); + } + } + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs b/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs new file mode 100644 index 00000000..f703d214 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Luts.cs @@ -0,0 +1,1612 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Types; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class Luts + { + public static readonly byte[] SizeGroupLookup = new byte[] + { + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 + }; + + public static readonly BlockSize[][] SubsizeLookup = new BlockSize[][] + { + new BlockSize[] + { // PARTITION_NONE + BlockSize.Block4x4, BlockSize.Block4x8, BlockSize.Block8x4, BlockSize.Block8x8, BlockSize.Block8x16, BlockSize.Block16x8, + BlockSize.Block16x16, BlockSize.Block16x32, BlockSize.Block32x16, BlockSize.Block32x32, BlockSize.Block32x64, + BlockSize.Block64x32, BlockSize.Block64x64 + }, + new BlockSize[] + { // PARTITION_HORZ + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block8x4, BlockSize.BlockInvalid, + BlockSize.BlockInvalid, BlockSize.Block16x8, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x16, + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block64x32 + }, + new BlockSize[] + { // PARTITION_VERT + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block4x8, BlockSize.BlockInvalid, + BlockSize.BlockInvalid, BlockSize.Block8x16, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block16x32, + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x64 + }, + new BlockSize[] + { // PARTITION_SPLIT + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block4x4, BlockSize.BlockInvalid, + BlockSize.BlockInvalid, BlockSize.Block8x8, BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block16x16, + BlockSize.BlockInvalid, BlockSize.BlockInvalid, BlockSize.Block32x32 + } + }; + + public static readonly TxSize[] MaxTxSizeLookup = new TxSize[] + { + TxSize.Tx4x4, TxSize.Tx4x4, TxSize.Tx4x4, TxSize.Tx8x8, TxSize.Tx8x8, TxSize.Tx8x8, TxSize.Tx16x16, + TxSize.Tx16x16, TxSize.Tx16x16, TxSize.Tx32x32, TxSize.Tx32x32, TxSize.Tx32x32, TxSize.Tx32x32 + }; + + public static readonly TxSize[] TxModeToBiggestTxSize = new TxSize[] + { + TxSize.Tx4x4, // ONLY_4X4 + TxSize.Tx8x8, // ALLOW_8X8 + TxSize.Tx16x16, // ALLOW_16X16 + TxSize.Tx32x32, // ALLOW_32X32 + TxSize.Tx32x32, // TX_MODE_SELECT + }; + + public static readonly BlockSize[][][] SsSizeLookup = new BlockSize[][][] + { + // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 + // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + new BlockSize[][] { new BlockSize[] { BlockSize.Block4x4, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.BlockInvalid } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block4x8, BlockSize.Block4x4 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.BlockInvalid } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block8x4, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block4x4, BlockSize.BlockInvalid } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block8x8, BlockSize.Block8x4 }, new BlockSize[] { BlockSize.Block4x8, BlockSize.Block4x4 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block8x16, BlockSize.Block8x8 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block4x8 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block16x8, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block8x8, BlockSize.Block8x4 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block16x16, BlockSize.Block16x8 }, new BlockSize[] { BlockSize.Block8x16, BlockSize.Block8x8 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block16x32, BlockSize.Block16x16 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block8x16 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block32x16, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block16x16, BlockSize.Block16x8 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block32x32, BlockSize.Block32x16 }, new BlockSize[] { BlockSize.Block16x32, BlockSize.Block16x16 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block32x64, BlockSize.Block32x32 }, new BlockSize[] { BlockSize.BlockInvalid, BlockSize.Block16x32 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block64x32, BlockSize.BlockInvalid }, new BlockSize[] { BlockSize.Block32x32, BlockSize.Block32x16 } }, + new BlockSize[][] { new BlockSize[] { BlockSize.Block64x64, BlockSize.Block64x32 }, new BlockSize[] { BlockSize.Block32x64, BlockSize.Block32x32 } }, + }; + + public static readonly TxSize[][][][] UvTxsizeLookup = new TxSize[][][][] + { + // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 + // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + new TxSize[][][] + { + // BLOCK_4X4 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + }, + new TxSize[][][] + { + // BLOCK_4X8 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + }, + new TxSize[][][] + { + // BLOCK_8X4 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + }, + new TxSize[][][] + { + // BLOCK_8X8 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + }, + new TxSize[][][] + { + // BLOCK_8X16 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + }, + new TxSize[][][] + { + // BLOCK_16X8 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + }, + new TxSize[][][] + { + // BLOCK_16X16 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + }, + new TxSize[][][] + { + // BLOCK_16X32 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + }, + new TxSize[][][] + { + // BLOCK_32X16 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx8x8 } }, + }, + new TxSize[][][] + { + // BLOCK_32X32 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + }, + new TxSize[][][] + { + // BLOCK_32X64 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + }, + new TxSize[][][] + { + // BLOCK_64X32 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx32x32, TxSize.Tx16x16 } }, + }, + new TxSize[][][] + { + // BLOCK_64X64 + new TxSize[][] { new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 }, new TxSize[] { TxSize.Tx4x4, TxSize.Tx4x4 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 }, new TxSize[] { TxSize.Tx8x8, TxSize.Tx8x8 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 }, new TxSize[] { TxSize.Tx16x16, TxSize.Tx16x16 } }, + new TxSize[][] { new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 }, new TxSize[] { TxSize.Tx32x32, TxSize.Tx32x32 } }, + }, + }; + + public struct PartitionContextPair + { + public sbyte Above; + public sbyte Left; + + public PartitionContextPair(sbyte above, sbyte left) + { + Above = above; + Left = left; + } + } + + // Generates 4 bit field in which each bit set to 1 represents + // a blocksize partition 1111 means we split 64x64, 32x32, 16x16 + // and 8x8. 1000 means we just split the 64x64 to 32x32 + public static readonly PartitionContextPair[] PartitionContextLookup = new PartitionContextPair[] + { + new PartitionContextPair(15, 15), // 4X4 - {0b1111, 0b1111} + new PartitionContextPair(15, 14), // 4X8 - {0b1111, 0b1110} + new PartitionContextPair(14, 15), // 8X4 - {0b1110, 0b1111} + new PartitionContextPair(14, 14), // 8X8 - {0b1110, 0b1110} + new PartitionContextPair(14, 12), // 8X16 - {0b1110, 0b1100} + new PartitionContextPair(12, 14), // 16X8 - {0b1100, 0b1110} + new PartitionContextPair(12, 12), // 16X16 - {0b1100, 0b1100} + new PartitionContextPair(12, 8), // 16X32 - {0b1100, 0b1000} + new PartitionContextPair(8, 12), // 32X16 - {0b1000, 0b1100} + new PartitionContextPair(8, 8), // 32X32 - {0b1000, 0b1000} + new PartitionContextPair(8, 0), // 32X64 - {0b1000, 0b0000} + new PartitionContextPair(0, 8), // 64X32 - {0b0000, 0b1000} + new PartitionContextPair(0, 0), // 64X64 - {0b0000, 0b0000} + }; + + // Filter + + private static readonly Array8[] BilinearFilters = new Array8[] + { + NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(0, 0, 0, 120, 8, 0, 0, 0), + NewArray8Short(0, 0, 0, 112, 16, 0, 0, 0), NewArray8Short(0, 0, 0, 104, 24, 0, 0, 0), + NewArray8Short(0, 0, 0, 96, 32, 0, 0, 0), NewArray8Short(0, 0, 0, 88, 40, 0, 0, 0), + NewArray8Short(0, 0, 0, 80, 48, 0, 0, 0), NewArray8Short(0, 0, 0, 72, 56, 0, 0, 0), + NewArray8Short(0, 0, 0, 64, 64, 0, 0, 0), NewArray8Short(0, 0, 0, 56, 72, 0, 0, 0), + NewArray8Short(0, 0, 0, 48, 80, 0, 0, 0), NewArray8Short(0, 0, 0, 40, 88, 0, 0, 0), + NewArray8Short(0, 0, 0, 32, 96, 0, 0, 0), NewArray8Short(0, 0, 0, 24, 104, 0, 0, 0), + NewArray8Short(0, 0, 0, 16, 112, 0, 0, 0), NewArray8Short(0, 0, 0, 8, 120, 0, 0, 0) + }; + + // Lagrangian interpolation filter + private static readonly Array8[] SubPelFilters8 = new Array8[] + { + NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(0, 1, -5, 126, 8, -3, 1, 0), + NewArray8Short(-1, 3, -10, 122, 18, -6, 2, 0), NewArray8Short(-1, 4, -13, 118, 27, -9, 3, -1), + NewArray8Short(-1, 4, -16, 112, 37, -11, 4, -1), NewArray8Short(-1, 5, -18, 105, 48, -14, 4, -1), + NewArray8Short(-1, 5, -19, 97, 58, -16, 5, -1), NewArray8Short(-1, 6, -19, 88, 68, -18, 5, -1), + NewArray8Short(-1, 6, -19, 78, 78, -19, 6, -1), NewArray8Short(-1, 5, -18, 68, 88, -19, 6, -1), + NewArray8Short(-1, 5, -16, 58, 97, -19, 5, -1), NewArray8Short(-1, 4, -14, 48, 105, -18, 5, -1), + NewArray8Short(-1, 4, -11, 37, 112, -16, 4, -1), NewArray8Short(-1, 3, -9, 27, 118, -13, 4, -1), + NewArray8Short(0, 2, -6, 18, 122, -10, 3, -1), NewArray8Short(0, 1, -3, 8, 126, -5, 1, 0) + }; + + // DCT based filter + private static readonly Array8[] SubPelFilters8S = new Array8[] + { + NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(-1, 3, -7, 127, 8, -3, 1, 0), + NewArray8Short(-2, 5, -13, 125, 17, -6, 3, -1), NewArray8Short(-3, 7, -17, 121, 27, -10, 5, -2), + NewArray8Short(-4, 9, -20, 115, 37, -13, 6, -2), NewArray8Short(-4, 10, -23, 108, 48, -16, 8, -3), + NewArray8Short(-4, 10, -24, 100, 59, -19, 9, -3), NewArray8Short(-4, 11, -24, 90, 70, -21, 10, -4), + NewArray8Short(-4, 11, -23, 80, 80, -23, 11, -4), NewArray8Short(-4, 10, -21, 70, 90, -24, 11, -4), + NewArray8Short(-3, 9, -19, 59, 100, -24, 10, -4), NewArray8Short(-3, 8, -16, 48, 108, -23, 10, -4), + NewArray8Short(-2, 6, -13, 37, 115, -20, 9, -4), NewArray8Short(-2, 5, -10, 27, 121, -17, 7, -3), + NewArray8Short(-1, 3, -6, 17, 125, -13, 5, -2), NewArray8Short(0, 1, -3, 8, 127, -7, 3, -1) + }; + + // freqmultiplier = 0.5 + private static readonly Array8[] SubPelFilters8Lp = new Array8[] + { + NewArray8Short(0, 0, 0, 128, 0, 0, 0, 0), NewArray8Short(-3, -1, 32, 64, 38, 1, -3, 0), + NewArray8Short(-2, -2, 29, 63, 41, 2, -3, 0), NewArray8Short(-2, -2, 26, 63, 43, 4, -4, 0), + NewArray8Short(-2, -3, 24, 62, 46, 5, -4, 0), NewArray8Short(-2, -3, 21, 60, 49, 7, -4, 0), + NewArray8Short(-1, -4, 18, 59, 51, 9, -4, 0), NewArray8Short(-1, -4, 16, 57, 53, 12, -4, -1), + NewArray8Short(-1, -4, 14, 55, 55, 14, -4, -1), NewArray8Short(-1, -4, 12, 53, 57, 16, -4, -1), + NewArray8Short(0, -4, 9, 51, 59, 18, -4, -1), NewArray8Short(0, -4, 7, 49, 60, 21, -3, -2), + NewArray8Short(0, -4, 5, 46, 62, 24, -3, -2), NewArray8Short(0, -4, 4, 43, 63, 26, -2, -2), + NewArray8Short(0, -3, 2, 41, 63, 29, -2, -2), NewArray8Short(0, -3, 1, 38, 64, 32, -1, -3) + }; + + private static Array8 NewArray8Short(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7) + { + Array8 output = new Array8(); + + output[0] = e0; + output[1] = e1; + output[2] = e2; + output[3] = e3; + output[4] = e4; + output[5] = e5; + output[6] = e6; + output[7] = e7; + + return output; + } + + public static readonly Array8[][] Vp9FilterKernels = new Array8[][] + { + SubPelFilters8, SubPelFilters8Lp, SubPelFilters8S, BilinearFilters + }; + + // Scan + + private static readonly short[] DefaultScan4X4 = new short[] + { + 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15, + }; + + private static readonly short[] ColScan4X4 = new short[] + { + 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15, + }; + + private static readonly short[] RowScan4X4 = new short[] + { + 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15, + }; + + private static readonly short[] DefaultScan8X8 = new short[] + { + 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26, + 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6, + 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30, + 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63, + }; + + private static readonly short[] ColScan8X8 = new short[] + { + 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3, + 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28, + 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7, + 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63, + }; + + private static readonly short[] RowScan8X8 = new short[] + { + 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12, + 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35, + 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51, + 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63, + }; + + private static readonly short[] DefaultScan16X16 = new short[] + { + 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, + 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, + 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, + 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, + 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, + 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, + 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11, + 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43, + 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137, + 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212, + 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45, + 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15, + 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216, + 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232, + 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219, + 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251, + 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, + 255, + }; + + private static readonly short[] ColScan16X16 = new short[] + { + 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, + 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, + 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, + 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, + 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, + 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, + 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72, + 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57, + 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198, + 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184, + 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75, + 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76, + 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61, + 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172, + 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126, + 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236, + 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, + 255, + }; + + private static readonly short[] RowScan16X16 = new short[] + { + 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, + 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, + 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, + 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, + 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, + 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, + 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161, + 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45, + 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122, + 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209, + 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183, + 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78, + 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243, + 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245, + 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188, + 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175, + 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, + 255, + }; + + private static readonly short[] DefaultScan32X32 = new short[] + { + 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, + 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, + 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6, + 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, + 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71, + 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, + 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, + 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418, + 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, + 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358, + 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, + 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, + 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329, + 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, + 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299, + 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, + 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, + 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, + 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, + 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673, + 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, + 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, + 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, + 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, + 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366, + 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, + 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, + 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, + 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, + 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523, + 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, + 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493, + 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, + 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, + 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152, + 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, + 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650, + 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, + 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, + 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279, + 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, + 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311, + 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, + 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, + 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901, + 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, + 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437, + 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, + 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, + 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, + 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, + 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502, + 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, + 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, + 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, + 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, + 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504, + 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, + 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, + 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, + 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, + 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724, + 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, + 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477, + 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, + 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, + 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820, + 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, + 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760, + 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, + 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, + 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700, + 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, + 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889, + 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, + 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, + 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862, + 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, + 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023, + }; + + // Neighborhood 2-tuples for various scans and blocksizes, + // in {top, left} order for each position in corresponding scan order. + private static readonly short[] DefaultScan4X4Neighbors = new short[] + { + 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, + 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, + }; + + private static readonly short[] ColScan4X4Neighbors = new short[] + { + 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, + 9, 2, 2, 6, 6, 2, 2, 3, 3, 10, 10, 7, 7, 11, 11, 0, 0, + }; + + private static readonly short[] RowScan4X4Neighbors = new short[] + { + 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, + 8, 6, 6, 8, 8, 9, 9, 12, 12, 10, 10, 13, 13, 14, 14, 0, 0, + }; + + private static readonly short[] ColScan8X8Neighbors = new short[] + { + 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, + 32, 17, 17, 2, 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, + 48, 48, 11, 11, 26, 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, + 12, 49, 49, 42, 42, 20, 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, + 13, 13, 36, 36, 5, 5, 21, 21, 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, + 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, 30, 45, 45, 15, 15, 38, 38, 23, 23, + 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, 47, 47, 55, 55, 0, 0, + }; + + private static readonly short[] RowScan8X8Neighbors = new short[] + { + 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, + 16, 10, 10, 16, 16, 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, + 5, 5, 12, 12, 19, 19, 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, + 27, 40, 40, 13, 13, 34, 34, 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, + 42, 42, 14, 14, 48, 48, 36, 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, + 50, 57, 57, 44, 44, 37, 37, 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, + 38, 38, 60, 60, 46, 46, 53, 53, 54, 54, 61, 61, 62, 62, 0, 0, + }; + + private static readonly short[] DefaultScan8X8Neighbors = new short[] + { + 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, + 2, 10, 17, 17, 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, + 4, 11, 26, 33, 19, 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, + 41, 20, 27, 13, 20, 5, 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, + 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, + 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, + 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, + }; + + private static readonly short[] ColScan16X16Neighbors = new short[] + { + 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, + 64, 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, + 65, 65, 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, + 128, 3, 3, 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, + 113, 113, 3, 3, 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, + 67, 20, 20, 83, 83, 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, + 52, 52, 99, 99, 5, 5, 130, 130, 68, 68, 192, 192, 161, 161, 21, + 21, 115, 115, 84, 84, 37, 37, 146, 146, 208, 208, 53, 53, 5, 5, + 100, 100, 177, 177, 131, 131, 69, 69, 6, 6, 224, 224, 116, 116, 22, + 22, 162, 162, 85, 85, 147, 147, 38, 38, 193, 193, 101, 101, 54, 54, + 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, 209, 209, 7, 7, 117, + 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, 225, 225, 39, 39, + 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, 71, 71, 210, + 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, 56, 56, + 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, 72, + 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, + 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, + 151, 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, + 10, 10, 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, + 121, 213, 213, 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, + 10, 10, 90, 90, 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, + 27, 199, 199, 43, 43, 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, + 11, 11, 75, 75, 138, 138, 200, 200, 215, 215, 91, 91, 12, 12, 28, + 28, 185, 185, 107, 107, 154, 154, 44, 44, 231, 231, 216, 216, 60, 60, + 123, 123, 12, 12, 76, 76, 201, 201, 170, 170, 232, 232, 139, 139, 92, + 92, 13, 13, 108, 108, 29, 29, 186, 186, 217, 217, 155, 155, 45, 45, + 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, 77, 77, 14, 14, 171, + 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, 46, 46, 156, 156, + 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, 31, 31, 172, + 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, 110, 110, + 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, 142, + 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, 220, + 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, 175, + 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, 223, + 239, 239, 0, 0, + }; + + private static readonly short[] RowScan16X16Neighbors = new short[] + { + 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, + 17, 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, + 19, 19, 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, + 7, 35, 35, 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, + 65, 65, 51, 51, 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, + 52, 23, 23, 81, 81, 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, + 82, 82, 96, 96, 68, 68, 24, 24, 97, 97, 83, 83, 39, 39, 96, + 96, 54, 54, 11, 11, 69, 69, 98, 98, 112, 112, 84, 84, 25, 25, + 40, 40, 55, 55, 113, 113, 99, 99, 12, 12, 70, 70, 112, 112, 85, + 85, 26, 26, 114, 114, 100, 100, 128, 128, 41, 41, 56, 56, 71, 71, + 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, 128, 128, 72, 72, 130, + 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, 42, 42, 144, 144, + 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, 88, 88, 132, + 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, 160, 160, + 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, 74, + 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, + 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, + 135, 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, + 60, 60, 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, + 45, 165, 165, 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, + 122, 122, 152, 152, 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, + 181, 224, 224, 107, 107, 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, + 168, 168, 210, 210, 46, 46, 138, 138, 92, 92, 183, 183, 225, 225, 211, + 211, 240, 240, 197, 197, 169, 169, 123, 123, 154, 154, 198, 198, 77, 77, + 212, 212, 184, 184, 108, 108, 226, 226, 199, 199, 62, 62, 227, 227, 241, + 241, 139, 139, 213, 213, 170, 170, 185, 185, 155, 155, 228, 228, 242, 242, + 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, 215, 215, 229, 229, 140, + 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, 156, 156, 244, 244, + 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, 202, 202, 246, + 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, 157, 187, 187, + 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, 203, 203, 142, + 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, 219, 174, 174, + 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, 206, 206, 236, + 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, 238, 253, 253, + 254, 254, 0, 0, + }; + + private static readonly short[] DefaultScan16X16Neighbors = new short[] + { + 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, + 32, 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, + 64, 64, 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, + 80, 35, 50, 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, + 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, + 67, 112, 112, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, + 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 128, 114, 129, 69, + 84, 38, 53, 7, 22, 7, 7, 129, 144, 23, 38, 54, 69, 100, 115, + 85, 100, 115, 130, 144, 144, 130, 145, 39, 54, 70, 85, 8, 23, 55, + 70, 116, 131, 101, 116, 145, 160, 24, 39, 8, 8, 86, 101, 131, 146, + 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161, + 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 9, 9, 176, 176, + 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148, + 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, 10, 10, + 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164, + 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, + 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, + 58, 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, + 209, 224, 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, + 196, 12, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, + 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, + 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, + 76, 91, 13, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154, + 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 14, 14, + 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170, + 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215, + 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171, + 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231, + 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110, + 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218, + 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234, + 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250, + 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236, + 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238, + 239, 254, 0, 0, + }; + + private static readonly short[] DefaultScan32X32Neighbors = new short[] + { + 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, + 33, 64, 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, + 97, 128, 3, 34, 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, + 160, 160, 4, 35, 67, 98, 192, 192, 4, 4, 130, 161, 161, 192, + 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 224, + 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, 225, 256, 256, 256, + 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, 195, 226, + 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, + 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, + 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, + 352, 352, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, + 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 8, 8, + 384, 384, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103, + 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386, + 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, 199, 230, + 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, + 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, + 294, 325, 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, + 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, + 11, 42, 106, 137, 480, 480, 450, 481, 358, 389, 264, 295, 201, 232, + 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 11, 11, 481, 512, + 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 512, + 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483, + 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, + 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, + 453, 484, 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, + 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, + 45, 76, 172, 203, 330, 361, 576, 576, 13, 13, 267, 298, 546, 577, + 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578, + 14, 45, 14, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204, + 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173, + 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, + 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, + 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, + 51, 82, 83, 114, 608, 608, 484, 515, 360, 391, 236, 267, 112, 143, + 19, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392, + 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 20, 20, 672, 672, + 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424, + 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145, + 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, + 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, + 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, + 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, + 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457, + 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178, + 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582, + 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117, + 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118, + 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, + 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, + 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, + 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, + 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, + 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 25, 25, + 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677, + 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460, + 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243, + 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 26, 26, + 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616, + 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, + 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, + 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, + 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, + 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122, + 864, 864, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151, + 27, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648, + 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183, + 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, 897, 804, 835, + 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556, + 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277, + 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, + 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, + 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, + 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, + 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, + 61, 92, 30, 61, 30, 30, 961, 992, 930, 961, 899, 930, 837, 868, + 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589, + 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310, + 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993, + 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590, + 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125, + 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, + 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, + 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, + 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, + 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, + 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529, + 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219, + 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871, + 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654, + 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437, + 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220, + 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841, + 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562, + 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252, + 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, + 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, + 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, + 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, + 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, + 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, + 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688, + 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409, + 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906, + 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689, + 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472, + 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000, + 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721, + 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442, + 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846, + 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, + 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, + 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, + 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, + 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, + 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, + 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879, + 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662, + 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445, + 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818, + 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508, + 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757, + 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882, + 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539, + 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571, + 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, + 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, + 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, + 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, + 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, + 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, + 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606, + 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667, + 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979, + 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700, + 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856, + 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012, + 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733, + 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734, + 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920, + 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, + 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, + 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, + 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, + 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, + 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, + 959, 990, 991, 1022, 0, 0, + }; + + private static readonly short[] Vp9DefaultIscan4X4 = new short[] + { + 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, + }; + + private static readonly short[] Vp9ColIscan4X4 = new short[] + { + 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, + }; + + private static readonly short[] Vp9RowIscan4X4 = new short[] + { + 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, + }; + + private static readonly short[] Vp9ColIscan8X8 = new short[] + { + 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, + 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, + 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, + 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, + }; + + private static readonly short[] Vp9RowIscan8X8 = new short[] + { + 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, + 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, + 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, + 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, + }; + + private static readonly short[] Vp9DefaultIscan8X8 = new short[] + { + 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, + 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, + 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, + 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, + }; + + private static readonly short[] Vp9ColIscan16X16 = new short[] + { + 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, + 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, + 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, + 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, + 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, + 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, + 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, + 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, + 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, + 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, + 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, + 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, + 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, + 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, + 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, + 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, + }; + + private static readonly short[] Vp9RowIscan16X16 = new short[] + { + 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, + 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, + 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, + 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, + 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, + 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, + 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98, + 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102, + 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108, + 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114, + 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123, + 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129, + 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137, + 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149, + 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152, + 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253, + 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254, + 255, + }; + + private static readonly short[] Vp9DefaultIscan16X16 = new short[] + { + 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, + 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, + 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, + 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, + 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, + 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, + 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99, + 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95, + 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93, + 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94, + 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97, + 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102, + 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111, + 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115, + 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121, + 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254, + 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253, + 255, + }; + + private static readonly short[] Vp9DefaultIscan32X32 = new short[] + { + 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, + 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, + 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45, + 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237, + 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3, + 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, + 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, + 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73, + 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296, + 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14, + 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, + 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, + 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105, + 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373, + 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33, + 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, + 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, + 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, + 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437, + 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56, + 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340, + 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, + 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, + 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545, + 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89, + 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395, + 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764, + 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, + 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, + 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, + 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486, + 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833, + 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349, + 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, + 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, + 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, + 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117, + 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415, + 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774, + 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283, + 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691, + 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, + 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, + 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, + 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363, + 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737, + 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206, + 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571, + 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, + 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, + 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, + 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302, + 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698, + 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944, + 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535, + 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854, + 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, + 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, + 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, + 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625, + 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929, + 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476, + 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820, + 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, + 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, + 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, + 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554, + 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860, + 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448, + 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790, + 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984, + 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, + 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, + 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, + 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, + 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003, + 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724, + 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958, + 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023, + }; + + public class ScanOrder + { + public short[] Scan { get; } + public short[] IScan { get; } + public short[] Neighbors { get; } + + public ScanOrder(short[] scan, short[] iScan, short[] neighbors) + { + Scan = scan; + IScan = iScan; + Neighbors = neighbors; + } + } + + public static readonly ScanOrder[] Vp9DefaultScanOrders = new ScanOrder[] + { + new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors), + new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors), + new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors), + new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors) + }; + + public static readonly ScanOrder[][] Vp9ScanOrders = new ScanOrder[][] + { + new ScanOrder[] + { // TX_4X4 + new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors), + new ScanOrder(RowScan4X4, Vp9RowIscan4X4, RowScan4X4Neighbors), + new ScanOrder(ColScan4X4, Vp9ColIscan4X4, ColScan4X4Neighbors), + new ScanOrder(DefaultScan4X4, Vp9DefaultIscan4X4, DefaultScan4X4Neighbors) + }, + new ScanOrder[] + { // TX_8X8 + new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors), + new ScanOrder(RowScan8X8, Vp9RowIscan8X8, RowScan8X8Neighbors), + new ScanOrder(ColScan8X8, Vp9ColIscan8X8, ColScan8X8Neighbors), + new ScanOrder(DefaultScan8X8, Vp9DefaultIscan8X8, DefaultScan8X8Neighbors) + }, + new ScanOrder[] + { // TX_16X16 + new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors), + new ScanOrder(RowScan16X16, Vp9RowIscan16X16, RowScan16X16Neighbors), + new ScanOrder(ColScan16X16, Vp9ColIscan16X16, ColScan16X16Neighbors), + new ScanOrder(DefaultScan16X16, Vp9DefaultIscan16X16, DefaultScan16X16Neighbors) + }, + new ScanOrder[] + { // TX_32X32 + new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors), + new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors), + new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors), + new ScanOrder(DefaultScan32X32, Vp9DefaultIscan32X32, DefaultScan32X32Neighbors) + } + }; + + // Entropy MV + + public static readonly sbyte[] Vp9MvJointTree = new sbyte[] + { + -(sbyte)MvJointType.MvJointZero, 2, -(sbyte)MvJointType.MvJointHnzvz, 4, -(sbyte)MvJointType.MvJointHzvnz, -(sbyte)MvJointType.MvJointHnzvnz + }; + + public static readonly sbyte[] Vp9MvClassTree = new sbyte[] + { + -(sbyte)MvClassType.MvClass0, + 2, + -(sbyte)MvClassType.MvClass1, + 4, + 6, + 8, + -(sbyte)MvClassType.MvClass2, + -(sbyte)MvClassType.MvClass3, + 10, + 12, + -(sbyte)MvClassType.MvClass4, + -(sbyte)MvClassType.MvClass5, + -(sbyte)MvClassType.MvClass6, + 14, + 16, + 18, + -(sbyte)MvClassType.MvClass7, + -(sbyte)MvClassType.MvClass8, + -(sbyte)MvClassType.MvClass9, + -(sbyte)MvClassType.MvClass10, + }; + + public static readonly sbyte[] Vp9MvFPTree = new sbyte[] { -0, 2, -1, 4, -2, -3 }; + + // Entropy + + public static readonly byte[] Vp9Cat1Prob = new byte[] { 159 }; + public static readonly byte[] Vp9Cat2Prob = new byte[] { 165, 145 }; + public static readonly byte[] Vp9Cat3Prob = new byte[] { 173, 148, 140 }; + public static readonly byte[] Vp9Cat4Prob = new byte[] { 176, 155, 140, 135 }; + public static readonly byte[] Vp9Cat5Prob = new byte[] { 180, 157, 141, 134, 130 }; + public static readonly byte[] Vp9Cat6Prob = new byte[] { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; + + public static readonly byte[] Vp9Cat6ProbHigh12 = new byte[] + { + 255, 255, 255, 255, 254, 254, 54, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 + }; + + private static readonly byte[] Vp9CoefbandTrans8X8Plus = new byte[] + { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + // Beyond MAXBAND_INDEX+1 all values are filled as 5 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + }; + + private static readonly byte[] Vp9CoefbandTrans4X4 = new byte[] + { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, + }; + + public static byte[] get_band_translate(TxSize txSize) + { + return txSize == TxSize.Tx4x4 ? Vp9CoefbandTrans4X4 : Vp9CoefbandTrans8X8Plus; + } + + public static readonly byte[][] Vp9Pareto8Full = new byte[][] + { + new byte[] { 3, 86, 128, 6, 86, 23, 88, 29 }, + new byte[] { 6, 86, 128, 11, 87, 42, 91, 52 }, + new byte[] { 9, 86, 129, 17, 88, 61, 94, 76 }, + new byte[] { 12, 86, 129, 22, 88, 77, 97, 93 }, + new byte[] { 15, 87, 129, 28, 89, 93, 100, 110 }, + new byte[] { 17, 87, 129, 33, 90, 105, 103, 123 }, + new byte[] { 20, 88, 130, 38, 91, 118, 106, 136 }, + new byte[] { 23, 88, 130, 43, 91, 128, 108, 146 }, + new byte[] { 26, 89, 131, 48, 92, 139, 111, 156 }, + new byte[] { 28, 89, 131, 53, 93, 147, 114, 163 }, + new byte[] { 31, 90, 131, 58, 94, 156, 117, 171 }, + new byte[] { 34, 90, 131, 62, 94, 163, 119, 177 }, + new byte[] { 37, 90, 132, 66, 95, 171, 122, 184 }, + new byte[] { 39, 90, 132, 70, 96, 177, 124, 189 }, + new byte[] { 42, 91, 132, 75, 97, 183, 127, 194 }, + new byte[] { 44, 91, 132, 79, 97, 188, 129, 198 }, + new byte[] { 47, 92, 133, 83, 98, 193, 132, 202 }, + new byte[] { 49, 92, 133, 86, 99, 197, 134, 205 }, + new byte[] { 52, 93, 133, 90, 100, 201, 137, 208 }, + new byte[] { 54, 93, 133, 94, 100, 204, 139, 211 }, + new byte[] { 57, 94, 134, 98, 101, 208, 142, 214 }, + new byte[] { 59, 94, 134, 101, 102, 211, 144, 216 }, + new byte[] { 62, 94, 135, 105, 103, 214, 146, 218 }, + new byte[] { 64, 94, 135, 108, 103, 216, 148, 220 }, + new byte[] { 66, 95, 135, 111, 104, 219, 151, 222 }, + new byte[] { 68, 95, 135, 114, 105, 221, 153, 223 }, + new byte[] { 71, 96, 136, 117, 106, 224, 155, 225 }, + new byte[] { 73, 96, 136, 120, 106, 225, 157, 226 }, + new byte[] { 76, 97, 136, 123, 107, 227, 159, 228 }, + new byte[] { 78, 97, 136, 126, 108, 229, 160, 229 }, + new byte[] { 80, 98, 137, 129, 109, 231, 162, 231 }, + new byte[] { 82, 98, 137, 131, 109, 232, 164, 232 }, + new byte[] { 84, 98, 138, 134, 110, 234, 166, 233 }, + new byte[] { 86, 98, 138, 137, 111, 235, 168, 234 }, + new byte[] { 89, 99, 138, 140, 112, 236, 170, 235 }, + new byte[] { 91, 99, 138, 142, 112, 237, 171, 235 }, + new byte[] { 93, 100, 139, 145, 113, 238, 173, 236 }, + new byte[] { 95, 100, 139, 147, 114, 239, 174, 237 }, + new byte[] { 97, 101, 140, 149, 115, 240, 176, 238 }, + new byte[] { 99, 101, 140, 151, 115, 241, 177, 238 }, + new byte[] { 101, 102, 140, 154, 116, 242, 179, 239 }, + new byte[] { 103, 102, 140, 156, 117, 242, 180, 239 }, + new byte[] { 105, 103, 141, 158, 118, 243, 182, 240 }, + new byte[] { 107, 103, 141, 160, 118, 243, 183, 240 }, + new byte[] { 109, 104, 141, 162, 119, 244, 185, 241 }, + new byte[] { 111, 104, 141, 164, 119, 244, 186, 241 }, + new byte[] { 113, 104, 142, 166, 120, 245, 187, 242 }, + new byte[] { 114, 104, 142, 168, 121, 245, 188, 242 }, + new byte[] { 116, 105, 143, 170, 122, 246, 190, 243 }, + new byte[] { 118, 105, 143, 171, 122, 246, 191, 243 }, + new byte[] { 120, 106, 143, 173, 123, 247, 192, 244 }, + new byte[] { 121, 106, 143, 175, 124, 247, 193, 244 }, + new byte[] { 123, 107, 144, 177, 125, 248, 195, 244 }, + new byte[] { 125, 107, 144, 178, 125, 248, 196, 244 }, + new byte[] { 127, 108, 145, 180, 126, 249, 197, 245 }, + new byte[] { 128, 108, 145, 181, 127, 249, 198, 245 }, + new byte[] { 130, 109, 145, 183, 128, 249, 199, 245 }, + new byte[] { 132, 109, 145, 184, 128, 249, 200, 245 }, + new byte[] { 134, 110, 146, 186, 129, 250, 201, 246 }, + new byte[] { 135, 110, 146, 187, 130, 250, 202, 246 }, + new byte[] { 137, 111, 147, 189, 131, 251, 203, 246 }, + new byte[] { 138, 111, 147, 190, 131, 251, 204, 246 }, + new byte[] { 140, 112, 147, 192, 132, 251, 205, 247 }, + new byte[] { 141, 112, 147, 193, 132, 251, 206, 247 }, + new byte[] { 143, 113, 148, 194, 133, 251, 207, 247 }, + new byte[] { 144, 113, 148, 195, 134, 251, 207, 247 }, + new byte[] { 146, 114, 149, 197, 135, 252, 208, 248 }, + new byte[] { 147, 114, 149, 198, 135, 252, 209, 248 }, + new byte[] { 149, 115, 149, 199, 136, 252, 210, 248 }, + new byte[] { 150, 115, 149, 200, 137, 252, 210, 248 }, + new byte[] { 152, 115, 150, 201, 138, 252, 211, 248 }, + new byte[] { 153, 115, 150, 202, 138, 252, 212, 248 }, + new byte[] { 155, 116, 151, 204, 139, 253, 213, 249 }, + new byte[] { 156, 116, 151, 205, 139, 253, 213, 249 }, + new byte[] { 158, 117, 151, 206, 140, 253, 214, 249 }, + new byte[] { 159, 117, 151, 207, 141, 253, 215, 249 }, + new byte[] { 161, 118, 152, 208, 142, 253, 216, 249 }, + new byte[] { 162, 118, 152, 209, 142, 253, 216, 249 }, + new byte[] { 163, 119, 153, 210, 143, 253, 217, 249 }, + new byte[] { 164, 119, 153, 211, 143, 253, 217, 249 }, + new byte[] { 166, 120, 153, 212, 144, 254, 218, 250 }, + new byte[] { 167, 120, 153, 212, 145, 254, 219, 250 }, + new byte[] { 168, 121, 154, 213, 146, 254, 220, 250 }, + new byte[] { 169, 121, 154, 214, 146, 254, 220, 250 }, + new byte[] { 171, 122, 155, 215, 147, 254, 221, 250 }, + new byte[] { 172, 122, 155, 216, 147, 254, 221, 250 }, + new byte[] { 173, 123, 155, 217, 148, 254, 222, 250 }, + new byte[] { 174, 123, 155, 217, 149, 254, 222, 250 }, + new byte[] { 176, 124, 156, 218, 150, 254, 223, 250 }, + new byte[] { 177, 124, 156, 219, 150, 254, 223, 250 }, + new byte[] { 178, 125, 157, 220, 151, 254, 224, 251 }, + new byte[] { 179, 125, 157, 220, 151, 254, 224, 251 }, + new byte[] { 180, 126, 157, 221, 152, 254, 225, 251 }, + new byte[] { 181, 126, 157, 221, 152, 254, 225, 251 }, + new byte[] { 183, 127, 158, 222, 153, 254, 226, 251 }, + new byte[] { 184, 127, 158, 223, 154, 254, 226, 251 }, + new byte[] { 185, 128, 159, 224, 155, 255, 227, 251 }, + new byte[] { 186, 128, 159, 224, 155, 255, 227, 251 }, + new byte[] { 187, 129, 160, 225, 156, 255, 228, 251 }, + new byte[] { 188, 130, 160, 225, 156, 255, 228, 251 }, + new byte[] { 189, 131, 160, 226, 157, 255, 228, 251 }, + new byte[] { 190, 131, 160, 226, 158, 255, 228, 251 }, + new byte[] { 191, 132, 161, 227, 159, 255, 229, 251 }, + new byte[] { 192, 132, 161, 227, 159, 255, 229, 251 }, + new byte[] { 193, 133, 162, 228, 160, 255, 230, 252 }, + new byte[] { 194, 133, 162, 229, 160, 255, 230, 252 }, + new byte[] { 195, 134, 163, 230, 161, 255, 231, 252 }, + new byte[] { 196, 134, 163, 230, 161, 255, 231, 252 }, + new byte[] { 197, 135, 163, 231, 162, 255, 231, 252 }, + new byte[] { 198, 135, 163, 231, 162, 255, 231, 252 }, + new byte[] { 199, 136, 164, 232, 163, 255, 232, 252 }, + new byte[] { 200, 136, 164, 232, 164, 255, 232, 252 }, + new byte[] { 201, 137, 165, 233, 165, 255, 233, 252 }, + new byte[] { 201, 137, 165, 233, 165, 255, 233, 252 }, + new byte[] { 202, 138, 166, 233, 166, 255, 233, 252 }, + new byte[] { 203, 138, 166, 233, 166, 255, 233, 252 }, + new byte[] { 204, 139, 166, 234, 167, 255, 234, 252 }, + new byte[] { 205, 139, 166, 234, 167, 255, 234, 252 }, + new byte[] { 206, 140, 167, 235, 168, 255, 235, 252 }, + new byte[] { 206, 140, 167, 235, 168, 255, 235, 252 }, + new byte[] { 207, 141, 168, 236, 169, 255, 235, 252 }, + new byte[] { 208, 141, 168, 236, 170, 255, 235, 252 }, + new byte[] { 209, 142, 169, 237, 171, 255, 236, 252 }, + new byte[] { 209, 143, 169, 237, 171, 255, 236, 252 }, + new byte[] { 210, 144, 169, 237, 172, 255, 236, 252 }, + new byte[] { 211, 144, 169, 237, 172, 255, 236, 252 }, + new byte[] { 212, 145, 170, 238, 173, 255, 237, 252 }, + new byte[] { 213, 145, 170, 238, 173, 255, 237, 252 }, + new byte[] { 214, 146, 171, 239, 174, 255, 237, 253 }, + new byte[] { 214, 146, 171, 239, 174, 255, 237, 253 }, + new byte[] { 215, 147, 172, 240, 175, 255, 238, 253 }, + new byte[] { 215, 147, 172, 240, 175, 255, 238, 253 }, + new byte[] { 216, 148, 173, 240, 176, 255, 238, 253 }, + new byte[] { 217, 148, 173, 240, 176, 255, 238, 253 }, + new byte[] { 218, 149, 173, 241, 177, 255, 239, 253 }, + new byte[] { 218, 149, 173, 241, 178, 255, 239, 253 }, + new byte[] { 219, 150, 174, 241, 179, 255, 239, 253 }, + new byte[] { 219, 151, 174, 241, 179, 255, 239, 253 }, + new byte[] { 220, 152, 175, 242, 180, 255, 240, 253 }, + new byte[] { 221, 152, 175, 242, 180, 255, 240, 253 }, + new byte[] { 222, 153, 176, 242, 181, 255, 240, 253 }, + new byte[] { 222, 153, 176, 242, 181, 255, 240, 253 }, + new byte[] { 223, 154, 177, 243, 182, 255, 240, 253 }, + new byte[] { 223, 154, 177, 243, 182, 255, 240, 253 }, + new byte[] { 224, 155, 178, 244, 183, 255, 241, 253 }, + new byte[] { 224, 155, 178, 244, 183, 255, 241, 253 }, + new byte[] { 225, 156, 178, 244, 184, 255, 241, 253 }, + new byte[] { 225, 157, 178, 244, 184, 255, 241, 253 }, + new byte[] { 226, 158, 179, 244, 185, 255, 242, 253 }, + new byte[] { 227, 158, 179, 244, 185, 255, 242, 253 }, + new byte[] { 228, 159, 180, 245, 186, 255, 242, 253 }, + new byte[] { 228, 159, 180, 245, 186, 255, 242, 253 }, + new byte[] { 229, 160, 181, 245, 187, 255, 242, 253 }, + new byte[] { 229, 160, 181, 245, 187, 255, 242, 253 }, + new byte[] { 230, 161, 182, 246, 188, 255, 243, 253 }, + new byte[] { 230, 162, 182, 246, 188, 255, 243, 253 }, + new byte[] { 231, 163, 183, 246, 189, 255, 243, 253 }, + new byte[] { 231, 163, 183, 246, 189, 255, 243, 253 }, + new byte[] { 232, 164, 184, 247, 190, 255, 243, 253 }, + new byte[] { 232, 164, 184, 247, 190, 255, 243, 253 }, + new byte[] { 233, 165, 185, 247, 191, 255, 244, 253 }, + new byte[] { 233, 165, 185, 247, 191, 255, 244, 253 }, + new byte[] { 234, 166, 185, 247, 192, 255, 244, 253 }, + new byte[] { 234, 167, 185, 247, 192, 255, 244, 253 }, + new byte[] { 235, 168, 186, 248, 193, 255, 244, 253 }, + new byte[] { 235, 168, 186, 248, 193, 255, 244, 253 }, + new byte[] { 236, 169, 187, 248, 194, 255, 244, 253 }, + new byte[] { 236, 169, 187, 248, 194, 255, 244, 253 }, + new byte[] { 236, 170, 188, 248, 195, 255, 245, 253 }, + new byte[] { 236, 170, 188, 248, 195, 255, 245, 253 }, + new byte[] { 237, 171, 189, 249, 196, 255, 245, 254 }, + new byte[] { 237, 172, 189, 249, 196, 255, 245, 254 }, + new byte[] { 238, 173, 190, 249, 197, 255, 245, 254 }, + new byte[] { 238, 173, 190, 249, 197, 255, 245, 254 }, + new byte[] { 239, 174, 191, 249, 198, 255, 245, 254 }, + new byte[] { 239, 174, 191, 249, 198, 255, 245, 254 }, + new byte[] { 240, 175, 192, 249, 199, 255, 246, 254 }, + new byte[] { 240, 176, 192, 249, 199, 255, 246, 254 }, + new byte[] { 240, 177, 193, 250, 200, 255, 246, 254 }, + new byte[] { 240, 177, 193, 250, 200, 255, 246, 254 }, + new byte[] { 241, 178, 194, 250, 201, 255, 246, 254 }, + new byte[] { 241, 178, 194, 250, 201, 255, 246, 254 }, + new byte[] { 242, 179, 195, 250, 202, 255, 246, 254 }, + new byte[] { 242, 180, 195, 250, 202, 255, 246, 254 }, + new byte[] { 242, 181, 196, 250, 203, 255, 247, 254 }, + new byte[] { 242, 181, 196, 250, 203, 255, 247, 254 }, + new byte[] { 243, 182, 197, 251, 204, 255, 247, 254 }, + new byte[] { 243, 183, 197, 251, 204, 255, 247, 254 }, + new byte[] { 244, 184, 198, 251, 205, 255, 247, 254 }, + new byte[] { 244, 184, 198, 251, 205, 255, 247, 254 }, + new byte[] { 244, 185, 199, 251, 206, 255, 247, 254 }, + new byte[] { 244, 185, 199, 251, 206, 255, 247, 254 }, + new byte[] { 245, 186, 200, 251, 207, 255, 247, 254 }, + new byte[] { 245, 187, 200, 251, 207, 255, 247, 254 }, + new byte[] { 246, 188, 201, 252, 207, 255, 248, 254 }, + new byte[] { 246, 188, 201, 252, 207, 255, 248, 254 }, + new byte[] { 246, 189, 202, 252, 208, 255, 248, 254 }, + new byte[] { 246, 190, 202, 252, 208, 255, 248, 254 }, + new byte[] { 247, 191, 203, 252, 209, 255, 248, 254 }, + new byte[] { 247, 191, 203, 252, 209, 255, 248, 254 }, + new byte[] { 247, 192, 204, 252, 210, 255, 248, 254 }, + new byte[] { 247, 193, 204, 252, 210, 255, 248, 254 }, + new byte[] { 248, 194, 205, 252, 211, 255, 248, 254 }, + new byte[] { 248, 194, 205, 252, 211, 255, 248, 254 }, + new byte[] { 248, 195, 206, 252, 212, 255, 249, 254 }, + new byte[] { 248, 196, 206, 252, 212, 255, 249, 254 }, + new byte[] { 249, 197, 207, 253, 213, 255, 249, 254 }, + new byte[] { 249, 197, 207, 253, 213, 255, 249, 254 }, + new byte[] { 249, 198, 208, 253, 214, 255, 249, 254 }, + new byte[] { 249, 199, 209, 253, 214, 255, 249, 254 }, + new byte[] { 250, 200, 210, 253, 215, 255, 249, 254 }, + new byte[] { 250, 200, 210, 253, 215, 255, 249, 254 }, + new byte[] { 250, 201, 211, 253, 215, 255, 249, 254 }, + new byte[] { 250, 202, 211, 253, 215, 255, 249, 254 }, + new byte[] { 250, 203, 212, 253, 216, 255, 249, 254 }, + new byte[] { 250, 203, 212, 253, 216, 255, 249, 254 }, + new byte[] { 251, 204, 213, 253, 217, 255, 250, 254 }, + new byte[] { 251, 205, 213, 253, 217, 255, 250, 254 }, + new byte[] { 251, 206, 214, 254, 218, 255, 250, 254 }, + new byte[] { 251, 206, 215, 254, 218, 255, 250, 254 }, + new byte[] { 252, 207, 216, 254, 219, 255, 250, 254 }, + new byte[] { 252, 208, 216, 254, 219, 255, 250, 254 }, + new byte[] { 252, 209, 217, 254, 220, 255, 250, 254 }, + new byte[] { 252, 210, 217, 254, 220, 255, 250, 254 }, + new byte[] { 252, 211, 218, 254, 221, 255, 250, 254 }, + new byte[] { 252, 212, 218, 254, 221, 255, 250, 254 }, + new byte[] { 253, 213, 219, 254, 222, 255, 250, 254 }, + new byte[] { 253, 213, 220, 254, 222, 255, 250, 254 }, + new byte[] { 253, 214, 221, 254, 223, 255, 250, 254 }, + new byte[] { 253, 215, 221, 254, 223, 255, 250, 254 }, + new byte[] { 253, 216, 222, 254, 224, 255, 251, 254 }, + new byte[] { 253, 217, 223, 254, 224, 255, 251, 254 }, + new byte[] { 253, 218, 224, 254, 225, 255, 251, 254 }, + new byte[] { 253, 219, 224, 254, 225, 255, 251, 254 }, + new byte[] { 254, 220, 225, 254, 225, 255, 251, 254 }, + new byte[] { 254, 221, 226, 254, 225, 255, 251, 254 }, + new byte[] { 254, 222, 227, 255, 226, 255, 251, 254 }, + new byte[] { 254, 223, 227, 255, 226, 255, 251, 254 }, + new byte[] { 254, 224, 228, 255, 227, 255, 251, 254 }, + new byte[] { 254, 225, 229, 255, 227, 255, 251, 254 }, + new byte[] { 254, 226, 230, 255, 228, 255, 251, 254 }, + new byte[] { 254, 227, 230, 255, 229, 255, 251, 254 }, + new byte[] { 255, 228, 231, 255, 230, 255, 251, 254 }, + new byte[] { 255, 229, 232, 255, 230, 255, 251, 254 }, + new byte[] { 255, 230, 233, 255, 231, 255, 252, 254 }, + new byte[] { 255, 231, 234, 255, 231, 255, 252, 254 }, + new byte[] { 255, 232, 235, 255, 232, 255, 252, 254 }, + new byte[] { 255, 233, 236, 255, 232, 255, 252, 254 }, + new byte[] { 255, 235, 237, 255, 233, 255, 252, 254 }, + new byte[] { 255, 236, 238, 255, 234, 255, 252, 254 }, + new byte[] { 255, 238, 240, 255, 235, 255, 252, 255 }, + new byte[] { 255, 239, 241, 255, 235, 255, 252, 254 }, + new byte[] { 255, 241, 243, 255, 236, 255, 252, 254 }, + new byte[] { 255, 243, 245, 255, 237, 255, 252, 254 }, + new byte[] { 255, 246, 247, 255, 239, 255, 253, 255 }, + }; + + /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ + public static readonly sbyte[] Vp9IntraModeTree = new sbyte[] + { + -(sbyte)PredictionMode.DcPred, 2, /* 0 = DC_NODE */ + -(sbyte)PredictionMode.TmPred, 4, /* 1 = TM_NODE */ + -(sbyte)PredictionMode.VPred, 6, /* 2 = V_NODE */ + 8, 12, /* 3 = COM_NODE */ + -(sbyte)PredictionMode.HPred, 10, /* 4 = H_NODE */ + -(sbyte)PredictionMode.D135Pred, -(sbyte)PredictionMode.D117Pred, /* 5 = D135_NODE */ + -(sbyte)PredictionMode.D45Pred, 14, /* 6 = D45_NODE */ + -(sbyte)PredictionMode.D63Pred, 16, /* 7 = D63_NODE */ + -(sbyte)PredictionMode.D153Pred, -(sbyte)PredictionMode.D207Pred /* 8 = D153_NODE */ + }; + + public static readonly sbyte[] Vp9InterModeTree = new sbyte[] + { + -((sbyte)PredictionMode.ZeroMv - (sbyte)PredictionMode. NearestMv), 2, + -((sbyte)PredictionMode.NearestMv - (sbyte)PredictionMode.NearestMv), 4, + -((sbyte)PredictionMode.NearMv - (sbyte)PredictionMode.NearestMv), + -((sbyte)PredictionMode.NewMv - (sbyte)PredictionMode.NearestMv) + }; + + public static readonly sbyte[] Vp9PartitionTree = new sbyte[] + { + -(sbyte)PartitionType.PartitionNone, 2, -(sbyte)PartitionType.PartitionHorz, 4, -(sbyte)PartitionType.PartitionVert, -(sbyte)PartitionType.PartitionSplit + }; + + public static readonly sbyte[] Vp9SwitchableInterpTree = new sbyte[] + { + -Constants.EightTap, 2, -Constants.EightTapSmooth, -Constants.EightTapSharp + }; + + public static readonly sbyte[] Vp9SegmentTree = new sbyte[] + { + 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7 + }; + + // MV Ref + + // This is used to figure out a context for the ref blocks. The code flattens + // an array that would have 3 possible counts (0, 1 & 2) for 3 choices by + // adding 9 for each intra block, 3 for each zero mv and 1 for each new + // motion vector. This single number is then converted into a context + // with a single lookup ( CounterToContext ). + public static readonly int[] Mode2Counter = new int[] + { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D207_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV + }; + + // There are 3^3 different combinations of 3 counts that can be either 0,1 or + // 2. However the actual count can never be greater than 2 so the highest + // counter we need is 18. 9 is an invalid counter that's never used. + public static readonly MotionVectorContext[] CounterToContext = new MotionVectorContext[] + { + MotionVectorContext.BothPredicted, // 0 + MotionVectorContext.NewPlusNonIntra, // 1 + MotionVectorContext.BothNew, // 2 + MotionVectorContext.ZeroPlusPredicted, // 3 + MotionVectorContext.NewPlusNonIntra, // 4 + MotionVectorContext.InvalidCase, // 5 + MotionVectorContext.BothZero, // 6 + MotionVectorContext.InvalidCase, // 7 + MotionVectorContext.InvalidCase, // 8 + MotionVectorContext.IntraPlusNonIntra, // 9 + MotionVectorContext.IntraPlusNonIntra, // 10 + MotionVectorContext.InvalidCase, // 11 + MotionVectorContext.IntraPlusNonIntra, // 12 + MotionVectorContext.InvalidCase, // 13 + MotionVectorContext.InvalidCase, // 14 + MotionVectorContext.InvalidCase, // 15 + MotionVectorContext.InvalidCase, // 16 + MotionVectorContext.InvalidCase, // 17 + MotionVectorContext.BothIntra // 18 + }; + + public static readonly Position[][] MvRefBlocks = new Position[][] + { + // 4X4 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, -1 ), + new Position( -2, 0 ), + new Position( 0, -2 ), + new Position( -2, -1 ), + new Position( -1, -2 ), + new Position( -2, -2 ) }, + // 4X8 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, -1 ), + new Position( -2, 0 ), + new Position( 0, -2 ), + new Position( -2, -1 ), + new Position( -1, -2 ), + new Position( -2, -2 ) }, + // 8X4 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, -1 ), + new Position( -2, 0 ), + new Position( 0, -2 ), + new Position( -2, -1 ), + new Position( -1, -2 ), + new Position( -2, -2 ) }, + // 8X8 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, -1 ), + new Position( -2, 0 ), + new Position( 0, -2 ), + new Position( -2, -1 ), + new Position( -1, -2 ), + new Position( -2, -2 ) }, + // 8X16 + new Position[] { new Position( 0, -1 ), + new Position( -1, 0 ), + new Position( 1, -1 ), + new Position( -1, -1 ), + new Position( 0, -2 ), + new Position( -2, 0 ), + new Position( -2, -1 ), + new Position( -1, -2 ) }, + // 16X8 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, 1 ), + new Position( -1, -1 ), + new Position( -2, 0 ), + new Position( 0, -2 ), + new Position( -1, -2 ), + new Position( -2, -1 ) }, + // 16X16 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, 1 ), + new Position( 1, -1 ), + new Position( -1, -1 ), + new Position( -3, 0 ), + new Position( 0, -3 ), + new Position( -3, -3 ) }, + // 16X32 + new Position[] { new Position( 0, -1 ), + new Position( -1, 0 ), + new Position( 2, -1 ), + new Position( -1, -1 ), + new Position( -1, 1 ), + new Position( 0, -3 ), + new Position( -3, 0 ), + new Position( -3, -3 ) }, + // 32X16 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, 2 ), + new Position( -1, -1 ), + new Position( 1, -1 ), + new Position( -3, 0 ), + new Position( 0, -3 ), + new Position( -3, -3 ) }, + // 32X32 + new Position[] { new Position( -1, 1 ), + new Position( 1, -1 ), + new Position( -1, 2 ), + new Position( 2, -1 ), + new Position( -1, -1 ), + new Position( -3, 0 ), + new Position( 0, -3 ), + new Position( -3, -3 ) }, + // 32X64 + new Position[] { new Position( 0, -1 ), + new Position( -1, 0 ), + new Position( 4, -1 ), + new Position( -1, 2 ), + new Position( -1, -1 ), + new Position( 0, -3 ), + new Position( -3, 0 ), + new Position( 2, -1 ) }, + // 64X32 + new Position[] { new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, 4 ), + new Position( 2, -1 ), + new Position( -1, -1 ), + new Position( -3, 0 ), + new Position( 0, -3 ), + new Position( -1, 2 ) }, + // 64X64 + new Position[] { new Position( -1, 3 ), + new Position( 3, -1 ), + new Position( -1, 4 ), + new Position( 4, -1 ), + new Position( -1, -1 ), + new Position( -1, 0 ), + new Position( 0, -1 ), + new Position( -1, 6 ) } + }; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs new file mode 100644 index 00000000..a9da1042 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/PredCommon.cs @@ -0,0 +1,389 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class PredCommon + { + public static int GetReferenceModeContext(ref Vp9Common cm, ref MacroBlockD xd) + { + int ctx; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull) + { // both edges available + if (!xd.AboveMi.Value.HasSecondRef() && !xd.LeftMi.Value.HasSecondRef()) + { + // Neither edge uses comp pred (0/1) + ctx = (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0) ^ + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef ? 1 : 0); + } + else if (!xd.AboveMi.Value.HasSecondRef()) + { + // One of two edges uses comp pred (2/3) + ctx = 2 + (xd.AboveMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.AboveMi.Value.IsInterBlock() ? 1 : 0); + } + else if (!xd.LeftMi.Value.HasSecondRef()) + { + // One of two edges uses comp pred (2/3) + ctx = 2 + (xd.LeftMi.Value.RefFrame[0] == cm.CompFixedRef || !xd.LeftMi.Value.IsInterBlock() ? 1 : 0); + } + else // Both edges use comp pred (4) + { + ctx = 4; + } + } + else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull) + { // One edge available + ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value; + + if (!edgeMi.HasSecondRef()) + { + // Edge does not use comp pred (0/1) + ctx = edgeMi.RefFrame[0] == cm.CompFixedRef ? 1 : 0; + } + else + { + // Edge uses comp pred (3) + ctx = 3; + } + } + else + { // No edges available (1) + ctx = 1; + } + Debug.Assert(ctx >= 0 && ctx < Constants.CompInterContexts); + return ctx; + } + + // Returns a context number for the given MB prediction signal + public static int GetPredContextCompRefP(ref Vp9Common cm, ref MacroBlockD xd) + { + int predContext; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + int fixRefIdx = cm.RefFrameSignBias[cm.CompFixedRef]; + int varRefIdx = fixRefIdx == 0 ? 1 : 0; + + if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull) + { // Both edges available + bool aboveIntra = !xd.AboveMi.Value.IsInterBlock(); + bool leftIntra = !xd.LeftMi.Value.IsInterBlock(); + + if (aboveIntra && leftIntra) + { // Intra/Intra (2) + predContext = 2; + } + else if (aboveIntra || leftIntra) + { // Intra/Inter + ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value; + + if (!edgeMi.HasSecondRef()) // single pred (1/3) + { + predContext = 1 + 2 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0); + } + else // Comp pred (1/3) + { + predContext = 1 + 2 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0); + } + } + else + { // Inter/Inter + bool lSg = !xd.LeftMi.Value.HasSecondRef(); + bool aSg = !xd.AboveMi.Value.HasSecondRef(); + sbyte vrfa = aSg ? xd.AboveMi.Value.RefFrame[0] : xd.AboveMi.Value.RefFrame[varRefIdx]; + sbyte vrfl = lSg ? xd.LeftMi.Value.RefFrame[0] : xd.LeftMi.Value.RefFrame[varRefIdx]; + + if (vrfa == vrfl && cm.CompVarRef[1] == vrfa) + { + predContext = 0; + } + else if (lSg && aSg) + { // Single/Single + if ((vrfa == cm.CompFixedRef && vrfl == cm.CompVarRef[0]) || + (vrfl == cm.CompFixedRef && vrfa == cm.CompVarRef[0])) + { + predContext = 4; + } + else if (vrfa == vrfl) + { + predContext = 3; + } + else + { + predContext = 1; + } + } + else if (lSg || aSg) + { // Single/Comp + sbyte vrfc = lSg ? vrfa : vrfl; + sbyte rfs = aSg ? vrfa : vrfl; + if (vrfc == cm.CompVarRef[1] && rfs != cm.CompVarRef[1]) + { + predContext = 1; + } + else if (rfs == cm.CompVarRef[1] && vrfc != cm.CompVarRef[1]) + { + predContext = 2; + } + else + { + predContext = 4; + } + } + else if (vrfa == vrfl) + { // Comp/Comp + predContext = 4; + } + else + { + predContext = 2; + } + } + } + else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull) + { // One edge available + ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value; + + if (!edgeMi.IsInterBlock()) + { + predContext = 2; + } + else + { + if (edgeMi.HasSecondRef()) + { + predContext = 4 * (edgeMi.RefFrame[varRefIdx] != cm.CompVarRef[1] ? 1 : 0); + } + else + { + predContext = 3 * (edgeMi.RefFrame[0] != cm.CompVarRef[1] ? 1 : 0); + } + } + } + else + { // No edges available (2) + predContext = 2; + } + Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts); + return predContext; + } + + public static int GetPredContextSingleRefP1(ref MacroBlockD xd) + { + int predContext; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull) + { // Both edges available + bool aboveIntra = !xd.AboveMi.Value.IsInterBlock(); + bool leftIntra = !xd.LeftMi.Value.IsInterBlock(); + + if (aboveIntra && leftIntra) + { // Intra/Intra + predContext = 2; + } + else if (aboveIntra || leftIntra) + { // Intra/Inter or Inter/Intra + ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value; + if (!edgeMi.HasSecondRef()) + { + predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0); + } + else + { + predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame || + edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0); + } + } + else + { // Inter/Inter + bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef(); + bool leftHasSecond = xd.LeftMi.Value.HasSecondRef(); + sbyte above0 = xd.AboveMi.Value.RefFrame[0]; + sbyte above1 = xd.AboveMi.Value.RefFrame[1]; + sbyte left0 = xd.LeftMi.Value.RefFrame[0]; + sbyte left1 = xd.LeftMi.Value.RefFrame[1]; + + if (aboveHasSecond && leftHasSecond) + { + predContext = 1 + (above0 == Constants.LastFrame || above1 == Constants.LastFrame || + left0 == Constants.LastFrame || left1 == Constants.LastFrame ? 1 : 0); + } + else if (aboveHasSecond || leftHasSecond) + { + sbyte rfs = !aboveHasSecond ? above0 : left0; + sbyte crf1 = aboveHasSecond ? above0 : left0; + sbyte crf2 = aboveHasSecond ? above1 : left1; + + if (rfs == Constants.LastFrame) + { + predContext = 3 + (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0); + } + else + { + predContext = (crf1 == Constants.LastFrame || crf2 == Constants.LastFrame ? 1 : 0); + } + } + else + { + predContext = 2 * (above0 == Constants.LastFrame ? 1 : 0) + 2 * (left0 == Constants.LastFrame ? 1 : 0); + } + } + } + else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull) + { // One edge available + ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value; + if (!edgeMi.IsInterBlock()) + { // Intra + predContext = 2; + } + else + { // Inter + if (!edgeMi.HasSecondRef()) + { + predContext = 4 * (edgeMi.RefFrame[0] == Constants.LastFrame ? 1 : 0); + } + else + { + predContext = 1 + (edgeMi.RefFrame[0] == Constants.LastFrame || + edgeMi.RefFrame[1] == Constants.LastFrame ? 1 : 0); + } + } + } + else + { // No edges available + predContext = 2; + } + Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts); + return predContext; + } + + public static int GetPredContextSingleRefP2(ref MacroBlockD xd) + { + int predContext; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (!xd.AboveMi.IsNull && !xd.LeftMi.IsNull) + { // Both edges available + bool aboveIntra = !xd.AboveMi.Value.IsInterBlock(); + bool leftIntra = !xd.LeftMi.Value.IsInterBlock(); + + if (aboveIntra && leftIntra) + { // Intra/Intra + predContext = 2; + } + else if (aboveIntra || leftIntra) + { // Intra/Inter or Inter/Intra + ref ModeInfo edgeMi = ref aboveIntra ? ref xd.LeftMi.Value : ref xd.AboveMi.Value; + if (!edgeMi.HasSecondRef()) + { + if (edgeMi.RefFrame[0] == Constants.LastFrame) + { + predContext = 3; + } + else + { + predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0); + } + } + else + { + predContext = 1 + 2 * (edgeMi.RefFrame[0] == Constants.GoldenFrame || + edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0); + } + } + else + { // Inter/Inter + bool aboveHasSecond = xd.AboveMi.Value.HasSecondRef(); + bool leftHasSecond = xd.LeftMi.Value.HasSecondRef(); + sbyte above0 = xd.AboveMi.Value.RefFrame[0]; + sbyte above1 = xd.AboveMi.Value.RefFrame[1]; + sbyte left0 = xd.LeftMi.Value.RefFrame[0]; + sbyte left1 = xd.LeftMi.Value.RefFrame[1]; + + if (aboveHasSecond && leftHasSecond) + { + if (above0 == left0 && above1 == left1) + { + predContext = 3 * (above0 == Constants.GoldenFrame || above1 == Constants.GoldenFrame || + left0 == Constants.GoldenFrame || left1 == Constants.GoldenFrame ? 1 : 0); + } + else + { + predContext = 2; + } + } + else if (aboveHasSecond || leftHasSecond) + { + sbyte rfs = !aboveHasSecond ? above0 : left0; + sbyte crf1 = aboveHasSecond ? above0 : left0; + sbyte crf2 = aboveHasSecond ? above1 : left1; + + if (rfs == Constants.GoldenFrame) + { + predContext = 3 + (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0); + } + else if (rfs == Constants.AltRefFrame) + { + predContext = crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0; + } + else + { + predContext = 1 + 2 * (crf1 == Constants.GoldenFrame || crf2 == Constants.GoldenFrame ? 1 : 0); + } + } + else + { + if (above0 == Constants.LastFrame && left0 == Constants.LastFrame) + { + predContext = 3; + } + else if (above0 == Constants.LastFrame || left0 == Constants.LastFrame) + { + sbyte edge0 = (above0 == Constants.LastFrame) ? left0 : above0; + predContext = 4 * (edge0 == Constants.GoldenFrame ? 1 : 0); + } + else + { + predContext = 2 * (above0 == Constants.GoldenFrame ? 1 : 0) + 2 * (left0 == Constants.GoldenFrame ? 1 : 0); + } + } + } + } + else if (!xd.AboveMi.IsNull || !xd.LeftMi.IsNull) + { // One edge available + ref ModeInfo edgeMi = ref !xd.AboveMi.IsNull ? ref xd.AboveMi.Value : ref xd.LeftMi.Value; + + if (!edgeMi.IsInterBlock() || (edgeMi.RefFrame[0] == Constants.LastFrame && !edgeMi.HasSecondRef())) + { + predContext = 2; + } + else if (!edgeMi.HasSecondRef()) + { + predContext = 4 * (edgeMi.RefFrame[0] == Constants.GoldenFrame ? 1 : 0); + } + else + { + predContext = 3 * (edgeMi.RefFrame[0] == Constants.GoldenFrame || + edgeMi.RefFrame[1] == Constants.GoldenFrame ? 1 : 0); + } + } + else + { // No edges available (2) + predContext = 2; + } + Debug.Assert(predContext >= 0 && predContext < Constants.RefContexts); + return predContext; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs b/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs new file mode 100644 index 00000000..5c52c32f --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/QuantCommon.cs @@ -0,0 +1,203 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class QuantCommon + { + public const int MinQ = 0; + public const int MaxQ = 255; + + private static readonly short[] DcQlookup = new short[] + { + 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, + 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, + 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, + 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, + 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, + 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, + 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, + 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, + 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, + 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, + 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, + 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, + 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, + 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, + 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, + 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, + 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, + 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, + 1184, 1232, 1282, 1336, + }; + + private static readonly short[] DcQlookup10 = new short[] + { + 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, + 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, + 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, + 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, + 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, + 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, + 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, + 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, + 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, + 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, + 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, + 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, + 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, + 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, + 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, + 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, + 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, + 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, + 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, + 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, + }; + + private static readonly short[] DcQlookup12 = new short[] + { + 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, + 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, + 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, + 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, + 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, + 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, + 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, + 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, + 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, + 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, + 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, + 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, + 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, + 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, + 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, + 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, + 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, + 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, + 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, + 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, + 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, + 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, + 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, + 19718, 20521, 21387, + }; + + private static readonly short[] AcQlookup = new short[] + { + 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, + 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, + 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, + 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, + 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, + 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, + 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, + 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, + 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, + 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, + 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, + 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, + }; + + private static readonly short[] AcQlookup10 = new short[] + { + 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, + 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, + 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, + 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, + 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, + 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, + 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, + 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, + 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, + 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, + 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, + 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, + 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, + 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, + 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, + 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, + 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, + 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, + 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, + 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, + }; + + private static readonly short[] AcQlookup12 = new short[] + { + 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, + 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, + 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, + 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, + 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, + 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, + 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, + 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, + 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, + 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, + 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, + 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, + 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, + 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, + 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, + 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, + 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, + 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, + 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, + 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, + 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, + 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, + 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, + 28143, 28687, 29247, + }; + + public static short DcQuant(int qindex, int delta, BitDepth bitDepth) + { + switch (bitDepth) + { + case BitDepth.Bits8: return DcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)]; + case BitDepth.Bits10: return DcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)]; + case BitDepth.Bits12: return DcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)]; + default: + Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } + } + + public static short AcQuant(int qindex, int delta, BitDepth bitDepth) + { + switch (bitDepth) + { + case BitDepth.Bits8: return AcQlookup[Math.Clamp(qindex + delta, 0, MaxQ)]; + case BitDepth.Bits10: return AcQlookup10[Math.Clamp(qindex + delta, 0, MaxQ)]; + case BitDepth.Bits12: return AcQlookup12[Math.Clamp(qindex + delta, 0, MaxQ)]; + default: + Debug.Assert(false, "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } + } + + public static int GetQIndex(ref Segmentation seg, int segmentId, int baseQIndex) + { + if (seg.IsSegFeatureActive(segmentId, SegLvlFeatures.SegLvlAltQ) != 0) + { + int data = seg.GetSegData(segmentId, SegLvlFeatures.SegLvlAltQ); + int segQIndex = seg.AbsDelta == Constants.SegmentAbsData ? data : baseQIndex + data; + return Math.Clamp(segQIndex, 0, MaxQ); + } + else + { + return baseQIndex; + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs b/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs new file mode 100644 index 00000000..a4c295e5 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/ReconInter.cs @@ -0,0 +1,234 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class ReconInter + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void InterPredictor( + byte* src, + int srcStride, + byte* dst, + int dstStride, + int subpelX, + int subpelY, + ref ScaleFactors sf, + int w, + int h, + int refr, + Array8[] kernel, + int xs, + int ys) + { + sf.InterPredict( + subpelX != 0 ? 1 : 0, + subpelY != 0 ? 1 : 0, + refr, + src, + srcStride, + dst, + dstStride, + subpelX, + subpelY, + w, + h, + kernel, + xs, + ys); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe void HighbdInterPredictor( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + int subpelX, + int subpelY, + ref ScaleFactors sf, + int w, + int h, + int refr, + Array8[] kernel, + int xs, + int ys, + int bd) + { + sf.HighbdInterPredict( + subpelX != 0 ? 1 : 0, + subpelY != 0 ? 1 : 0, + refr, + src, + srcStride, + dst, + dstStride, + subpelX, + subpelY, + w, + h, + kernel, + xs, + ys, + bd); + } + + private static int RoundMvCompQ4(int value) + { + return (value < 0 ? value - 2 : value + 2) / 4; + } + + private static Mv MiMvPredQ4(ref ModeInfo mi, int idx) + { + Mv res = new Mv() + { + Row = (short)RoundMvCompQ4( + mi.Bmi[0].Mv[idx].Row + mi.Bmi[1].Mv[idx].Row + + mi.Bmi[2].Mv[idx].Row + mi.Bmi[3].Mv[idx].Row), + Col = (short)RoundMvCompQ4( + mi.Bmi[0].Mv[idx].Col + mi.Bmi[1].Mv[idx].Col + + mi.Bmi[2].Mv[idx].Col + mi.Bmi[3].Mv[idx].Col) + }; + return res; + } + + private static int RoundMvCompQ2(int value) + { + return (value < 0 ? value - 1 : value + 1) / 2; + } + + private static Mv MiMvPredQ2(ref ModeInfo mi, int idx, int block0, int block1) + { + Mv res = new Mv() + { + Row = (short)RoundMvCompQ2( + mi.Bmi[block0].Mv[idx].Row + + mi.Bmi[block1].Mv[idx].Row), + Col = (short)RoundMvCompQ2( + mi.Bmi[block0].Mv[idx].Col + + mi.Bmi[block1].Mv[idx].Col) + }; + return res; + } + + public static Mv ClampMvToUmvBorderSb(ref MacroBlockD xd, ref Mv srcMv, int bw, int bh, int ssX, int ssY) + { + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. + int spelLeft = (Constants.Vp9InterpExtend + bw) << SubpelBits; + int spelRight = spelLeft - SubpelShifts; + int spelTop = (Constants.Vp9InterpExtend + bh) << SubpelBits; + int spelBottom = spelTop - SubpelShifts; + Mv clampedMv = new Mv() + { + Row = (short)(srcMv.Row * (1 << (1 - ssY))), + Col = (short)(srcMv.Col * (1 << (1 - ssX))) + }; + + Debug.Assert(ssX <= 1); + Debug.Assert(ssY <= 1); + + clampedMv.ClampMv( + xd.MbToLeftEdge * (1 << (1 - ssX)) - spelLeft, + xd.MbToRightEdge * (1 << (1 - ssX)) + spelRight, + xd.MbToTopEdge * (1 << (1 - ssY)) - spelTop, + xd.MbToBottomEdge * (1 << (1 - ssY)) + spelBottom); + + return clampedMv; + } + + public static Mv AverageSplitMvs(ref MacroBlockDPlane pd, ref ModeInfo mi, int refr, int block) + { + int ssIdx = ((pd.SubsamplingX > 0 ? 1 : 0) << 1) | (pd.SubsamplingY > 0 ? 1 : 0); + Mv res = new Mv(); + switch (ssIdx) + { + case 0: res = mi.Bmi[block].Mv[refr]; break; + case 1: res = MiMvPredQ2(ref mi, refr, block, block + 2); break; + case 2: res = MiMvPredQ2(ref mi, refr, block, block + 1); break; + case 3: res = MiMvPredQ4(ref mi, refr); break; + default: Debug.Assert(ssIdx <= 3 && ssIdx >= 0); break; + } + return res; + } + + private static int ScaledBufferOffset(int xOffset, int yOffset, int stride, Ptr sf) + { + int x = !sf.IsNull ? sf.Value.ScaleValueX(xOffset) : xOffset; + int y = !sf.IsNull ? sf.Value.ScaleValueY(yOffset) : yOffset; + return y * stride + x; + } + + private static void SetupPredPlanes( + ref Buf2D dst, + ArrayPtr src, + int stride, + int miRow, + int miCol, + Ptr scale, + int subsamplingX, + int subsamplingY) + { + int x = (Constants.MiSize * miCol) >> subsamplingX; + int y = (Constants.MiSize * miRow) >> subsamplingY; + dst.Buf = src.Slice(ScaledBufferOffset(x, y, stride, scale)); + dst.Stride = stride; + } + + public static void SetupDstPlanes( + ref Array3 planes, + ref Surface src, + int miRow, + int miCol) + { + Span> buffers = stackalloc ArrayPtr[Constants.MaxMbPlane]; + buffers[0] = src.YBuffer; + buffers[1] = src.UBuffer; + buffers[2] = src.VBuffer; + Span strides = stackalloc int[Constants.MaxMbPlane]; + strides[0] = src.Stride; + strides[1] = src.UvStride; + strides[2] = src.UvStride; + int i; + + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + ref MacroBlockDPlane pd = ref planes[i]; + SetupPredPlanes(ref pd.Dst, buffers[i], strides[i], miRow, miCol, Ptr.Null, pd.SubsamplingX, pd.SubsamplingY); + } + } + + public static void SetupPrePlanes( + ref MacroBlockD xd, + int idx, + ref Surface src, + int miRow, + int miCol, + Ptr sf) + { + if (!src.YBuffer.IsNull && !src.UBuffer.IsNull && !src.VBuffer.IsNull) + { + Span> buffers = stackalloc ArrayPtr[Constants.MaxMbPlane]; + buffers[0] = src.YBuffer; + buffers[1] = src.UBuffer; + buffers[2] = src.VBuffer; + Span strides = stackalloc int[Constants.MaxMbPlane]; + strides[0] = src.Stride; + strides[1] = src.UvStride; + strides[2] = src.UvStride; + int i; + + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + ref MacroBlockDPlane pd = ref xd.Plane[i]; + SetupPredPlanes(ref pd.Pre[idx], buffers[i], strides[i], miRow, miCol, sf, pd.SubsamplingX, pd.SubsamplingY); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs b/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs new file mode 100644 index 00000000..0e1ddfb3 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/ReconIntra.cs @@ -0,0 +1,761 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.IntraPred; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal static class ReconIntra + { + public static readonly TxType[] IntraModeToTxTypeLookup = new TxType[] + { + TxType.DctDct, // DC + TxType.AdstDct, // V + TxType.DctAdst, // H + TxType.DctDct, // D45 + TxType.AdstAdst, // D135 + TxType.AdstDct, // D117 + TxType.DctAdst, // D153 + TxType.DctAdst, // D207 + TxType.AdstDct, // D63 + TxType.AdstAdst // TM + }; + + private const int NeedLeft = 1 << 1; + private const int NeedAbove = 1 << 2; + private const int NeedAboveRight = 1 << 3; + + private static readonly byte[] ExtendModes = new byte[] + { + NeedAbove | NeedLeft, // DC + NeedAbove, // V + NeedLeft, // H + NeedAboveRight, // D45 + NeedLeft | NeedAbove, // D135 + NeedLeft | NeedAbove, // D117 + NeedLeft | NeedAbove, // D153 + NeedLeft, // D207 + NeedAboveRight, // D63 + NeedLeft | NeedAbove, // TM + }; + + private unsafe delegate void IntraPredFn(byte* dst, int stride, byte* above, byte* left); + + private static unsafe IntraPredFn[][] _pred = new IntraPredFn[][] + { + new IntraPredFn[] + { + null, + null, + null, + null + }, + new IntraPredFn[] + { + VPredictor4x4, + VPredictor8x8, + VPredictor16x16, + VPredictor32x32 + }, + new IntraPredFn[] + { + HPredictor4x4, + HPredictor8x8, + HPredictor16x16, + HPredictor32x32 + }, + new IntraPredFn[] + { + D45Predictor4x4, + D45Predictor8x8, + D45Predictor16x16, + D45Predictor32x32 + }, + new IntraPredFn[] + { + D135Predictor4x4, + D135Predictor8x8, + D135Predictor16x16, + D135Predictor32x32 + }, + new IntraPredFn[] + { + D117Predictor4x4, + D117Predictor8x8, + D117Predictor16x16, + D117Predictor32x32 + }, + new IntraPredFn[] + { + D153Predictor4x4, + D153Predictor8x8, + D153Predictor16x16, + D153Predictor32x32 + }, + new IntraPredFn[] + { + D207Predictor4x4, + D207Predictor8x8, + D207Predictor16x16, + D207Predictor32x32 + }, + new IntraPredFn[] + { + D63Predictor4x4, + D63Predictor8x8, + D63Predictor16x16, + D63Predictor32x32 + }, + new IntraPredFn[] + { + TMPredictor4x4, + TMPredictor8x8, + TMPredictor16x16, + TMPredictor32x32 + } + }; + + private static unsafe IntraPredFn[][][] _dcPred = new IntraPredFn[][][] + { + new IntraPredFn[][] + { + new IntraPredFn[] + { + Dc128Predictor4x4, + Dc128Predictor8x8, + Dc128Predictor16x16, + Dc128Predictor32x32 + }, + new IntraPredFn[] + { + DcTopPredictor4x4, + DcTopPredictor8x8, + DcTopPredictor16x16, + DcTopPredictor32x32 + } + }, + new IntraPredFn[][] + { + new IntraPredFn[] + { + DcLeftPredictor4x4, + DcLeftPredictor8x8, + DcLeftPredictor16x16, + DcLeftPredictor32x32 + }, + new IntraPredFn[] + { + DcPredictor4x4, + DcPredictor8x8, + DcPredictor16x16, + DcPredictor32x32 + } + } + }; + + private unsafe delegate void IntraHighPredFn(ushort* dst, int stride, ushort* above, ushort* left, int bd); + + private static unsafe IntraHighPredFn[][] _predHigh = new IntraHighPredFn[][] + { + new IntraHighPredFn[] + { + null, + null, + null, + null + }, + new IntraHighPredFn[] + { + HighbdVPredictor4x4, + HighbdVPredictor8x8, + HighbdVPredictor16x16, + HighbdVPredictor32x32 + }, + new IntraHighPredFn[] + { + HighbdHPredictor4x4, + HighbdHPredictor8x8, + HighbdHPredictor16x16, + HighbdHPredictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD45Predictor4x4, + HighbdD45Predictor8x8, + HighbdD45Predictor16x16, + HighbdD45Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD135Predictor4x4, + HighbdD135Predictor8x8, + HighbdD135Predictor16x16, + HighbdD135Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD117Predictor4x4, + HighbdD117Predictor8x8, + HighbdD117Predictor16x16, + HighbdD117Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD153Predictor4x4, + HighbdD153Predictor8x8, + HighbdD153Predictor16x16, + HighbdD153Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD207Predictor4x4, + HighbdD207Predictor8x8, + HighbdD207Predictor16x16, + HighbdD207Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdD63Predictor4x4, + HighbdD63Predictor8x8, + HighbdD63Predictor16x16, + HighbdD63Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdTMPredictor4x4, + HighbdTMPredictor8x8, + HighbdTMPredictor16x16, + HighbdTMPredictor32x32 + } + }; + + private static unsafe IntraHighPredFn[][][] _dcPredHigh = new IntraHighPredFn[][][] + { + new IntraHighPredFn[][] + { + new IntraHighPredFn[] + { + HighbdDc128Predictor4x4, + HighbdDc128Predictor8x8, + HighbdDc128Predictor16x16, + HighbdDc128Predictor32x32 + }, + new IntraHighPredFn[] + { + HighbdDcTopPredictor4x4, + HighbdDcTopPredictor8x8, + HighbdDcTopPredictor16x16, + HighbdDcTopPredictor32x32 + } + }, + new IntraHighPredFn[][] + { + new IntraHighPredFn[] + { + HighbdDcLeftPredictor4x4, + HighbdDcLeftPredictor8x8, + HighbdDcLeftPredictor16x16, + HighbdDcLeftPredictor32x32 + }, + new IntraHighPredFn[] + { + HighbdDcPredictor4x4, + HighbdDcPredictor8x8, + HighbdDcPredictor16x16, + HighbdDcPredictor32x32 + } + } + }; + + private static unsafe void BuildIntraPredictorsHigh( + ref MacroBlockD xd, + byte* ref8, + int refStride, + byte* dst8, + int dstStride, + PredictionMode mode, + TxSize txSize, + int upAvailable, + int leftAvailable, + int rightAvailable, + int x, + int y, + int plane) + { + int i; + ushort* dst = (ushort*)dst8; + ushort* refr = (ushort*)ref8; + ushort* leftCol = stackalloc ushort[32]; + ushort* aboveData = stackalloc ushort[64 + 16]; + ushort* aboveRow = aboveData + 16; + ushort* constAboveRow = aboveRow; + int bs = 4 << (int)txSize; + int frameWidth, frameHeight; + int x0, y0; + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + int needLeft = ExtendModes[(int)mode] & NeedLeft; + int needAbove = ExtendModes[(int)mode] & NeedAbove; + int needAboveRight = ExtendModes[(int)mode] & NeedAboveRight; + int baseVal = 128 << (xd.Bd - 8); + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. + + // Get current frame pointer, width and height. + if (plane == 0) + { + frameWidth = xd.CurBuf.Width; + frameHeight = xd.CurBuf.Height; + } + else + { + frameWidth = xd.CurBuf.UvWidth; + frameHeight = xd.CurBuf.UvHeight; + } + + // Get block position in current frame. + x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x; + y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y; + + // NEED_LEFT + if (needLeft != 0) + { + if (leftAvailable != 0) + { + if (xd.MbToBottomEdge < 0) + { + /* slower path if the block needs border extension */ + if (y0 + bs <= frameHeight) + { + for (i = 0; i < bs; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + } + else + { + int extendBottom = frameHeight - y0; + for (i = 0; i < extendBottom; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + + for (; i < bs; ++i) + { + leftCol[i] = refr[(extendBottom - 1) * refStride - 1]; + } + } + } + else + { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + } + } + else + { + MemoryUtil.Fill(leftCol, (ushort)(baseVal + 1), bs); + } + } + + // NEED_ABOVE + if (needAbove != 0) + { + if (upAvailable != 0) + { + ushort* aboveRef = refr - refStride; + if (xd.MbToRightEdge < 0) + { + /* slower path if the block needs border extension */ + if (x0 + bs <= frameWidth) + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + } + else if (x0 <= frameWidth) + { + int r = frameWidth - x0; + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth); + } + } + else + { + /* faster path if the block does not need extension */ + if (bs == 4 && rightAvailable != 0 && leftAvailable != 0) + { + constAboveRow = aboveRef; + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + } + } + aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1); + } + else + { + MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs); + aboveRow[-1] = (ushort)(baseVal - 1); + } + } + + // NEED_ABOVERIGHT + if (needAboveRight != 0) + { + if (upAvailable != 0) + { + ushort* aboveRef = refr - refStride; + if (xd.MbToRightEdge < 0) + { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frameWidth) + { + if (rightAvailable != 0 && bs == 4) + { + MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs); + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + } + else if (x0 + bs <= frameWidth) + { + int r = frameWidth - x0; + if (rightAvailable != 0 && bs == 4) + { + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth); + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + } + else if (x0 <= frameWidth) + { + int r = frameWidth - x0; + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth); + } + aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1); + } + else + { + /* faster path if the block does not need extension */ + if (bs == 4 && rightAvailable != 0 && leftAvailable != 0) + { + constAboveRow = aboveRef; + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + if (bs == 4 && rightAvailable != 0) + { + MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs); + } + else + { + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + + aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (ushort)(baseVal + 1); + } + } + } + else + { + MemoryUtil.Fill(aboveRow, (ushort)(baseVal - 1), bs * 2); + aboveRow[-1] = (ushort)(baseVal - 1); + } + } + + // Predict + if (mode == PredictionMode.DcPred) + { + _dcPredHigh[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd); + } + else + { + _predHigh[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol, xd.Bd); + } + } + + public static unsafe void BuildIntraPredictors( + ref MacroBlockD xd, + byte* refr, + int refStride, + byte* dst, + int dstStride, + PredictionMode mode, + TxSize txSize, + int upAvailable, + int leftAvailable, + int rightAvailable, + int x, + int y, + int plane) + { + int i; + byte* leftCol = stackalloc byte[32]; + byte* aboveData = stackalloc byte[64 + 16]; + byte* aboveRow = aboveData + 16; + byte* constAboveRow = aboveRow; + int bs = 4 << (int)txSize; + int frameWidth, frameHeight; + int x0, y0; + ref MacroBlockDPlane pd = ref xd.Plane[plane]; + + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // .. + + // Get current frame pointer, width and height. + if (plane == 0) + { + frameWidth = xd.CurBuf.Width; + frameHeight = xd.CurBuf.Height; + } + else + { + frameWidth = xd.CurBuf.UvWidth; + frameHeight = xd.CurBuf.UvHeight; + } + + // Get block position in current frame. + x0 = (-xd.MbToLeftEdge >> (3 + pd.SubsamplingX)) + x; + y0 = (-xd.MbToTopEdge >> (3 + pd.SubsamplingY)) + y; + + // NEED_LEFT + if ((ExtendModes[(int)mode] & NeedLeft) != 0) + { + if (leftAvailable != 0) + { + if (xd.MbToBottomEdge < 0) + { + /* Slower path if the block needs border extension */ + if (y0 + bs <= frameHeight) + { + for (i = 0; i < bs; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + } + else + { + int extendBottom = frameHeight - y0; + for (i = 0; i < extendBottom; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + + for (; i < bs; ++i) + { + leftCol[i] = refr[(extendBottom - 1) * refStride - 1]; + } + } + } + else + { + /* Faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + { + leftCol[i] = refr[i * refStride - 1]; + } + } + } + else + { + MemoryUtil.Fill(leftCol, (byte)129, bs); + } + } + + // NEED_ABOVE + if ((ExtendModes[(int)mode] & NeedAbove) != 0) + { + if (upAvailable != 0) + { + byte* aboveRef = refr - refStride; + if (xd.MbToRightEdge < 0) + { + /* Slower path if the block needs border extension */ + if (x0 + bs <= frameWidth) + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + } + else if (x0 <= frameWidth) + { + int r = frameWidth - x0; + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + bs - frameWidth); + } + } + else + { + /* Faster path if the block does not need extension */ + if (bs == 4 && rightAvailable != 0 && leftAvailable != 0) + { + constAboveRow = aboveRef; + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + } + } + aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129; + } + else + { + MemoryUtil.Fill(aboveRow, (byte)127, bs); + aboveRow[-1] = 127; + } + } + + // NEED_ABOVERIGHT + if ((ExtendModes[(int)mode] & NeedAboveRight) != 0) + { + if (upAvailable != 0) + { + byte* aboveRef = refr - refStride; + if (xd.MbToRightEdge < 0) + { + /* Slower path if the block needs border extension */ + if (x0 + 2 * bs <= frameWidth) + { + if (rightAvailable != 0 && bs == 4) + { + MemoryUtil.Copy(aboveRow, aboveRef, 2 * bs); + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + } + else if (x0 + bs <= frameWidth) + { + int r = frameWidth - x0; + if (rightAvailable != 0 && bs == 4) + { + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth); + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + } + else if (x0 <= frameWidth) + { + int r = frameWidth - x0; + MemoryUtil.Copy(aboveRow, aboveRef, r); + MemoryUtil.Fill(aboveRow + r, aboveRow[r - 1], x0 + 2 * bs - frameWidth); + } + } + else + { + /* Faster path if the block does not need extension */ + if (bs == 4 && rightAvailable != 0 && leftAvailable != 0) + { + constAboveRow = aboveRef; + } + else + { + MemoryUtil.Copy(aboveRow, aboveRef, bs); + if (bs == 4 && rightAvailable != 0) + { + MemoryUtil.Copy(aboveRow + bs, aboveRef + bs, bs); + } + else + { + MemoryUtil.Fill(aboveRow + bs, aboveRow[bs - 1], bs); + } + } + } + aboveRow[-1] = leftAvailable != 0 ? aboveRef[-1] : (byte)129; + } + else + { + MemoryUtil.Fill(aboveRow, (byte)127, bs * 2); + aboveRow[-1] = 127; + } + } + + // Predict + if (mode == PredictionMode.DcPred) + { + _dcPred[leftAvailable][upAvailable][(int)txSize](dst, dstStride, constAboveRow, leftCol); + } + else + { + _pred[(int)mode][(int)txSize](dst, dstStride, constAboveRow, leftCol); + } + } + + public static unsafe void PredictIntraBlock( + ref MacroBlockD xd, + int bwlIn, + TxSize txSize, + PredictionMode mode, + byte* refr, + int refStride, + byte* dst, + int dstStride, + int aoff, + int loff, + int plane) + { + int bw = 1 << bwlIn; + int txw = 1 << (int)txSize; + int haveTop = loff != 0 || !xd.AboveMi.IsNull ? 1 : 0; + int haveLeft = aoff != 0 || !xd.LeftMi.IsNull ? 1 : 0; + int haveRight = (aoff + txw) < bw ? 1 : 0; + int x = aoff * 4; + int y = loff * 4; + + if (xd.CurBuf.HighBd) + { + BuildIntraPredictorsHigh( + ref xd, + refr, + refStride, + dst, + dstStride, + mode, + txSize, + haveTop, + haveLeft, + haveRight, + x, + y, + plane); + return; + } + BuildIntraPredictors( + ref xd, + refr, + refStride, + dst, + dstStride, + mode, + txSize, + haveTop, + haveLeft, + haveRight, + x, + y, + plane); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj b/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj new file mode 100644 index 00000000..8fb9d435 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Ryujinx.Graphics.Nvdec.Vp9.csproj @@ -0,0 +1,20 @@ + + + + netcoreapp3.1 + + + + true + + + + true + + + + + + + + diff --git a/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs b/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs new file mode 100644 index 00000000..3b60889b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/TileBuffer.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal struct TileBuffer + { + public ArrayPtr Data; + public int Size; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs b/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs new file mode 100644 index 00000000..2a483702 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/TileWorkerData.cs @@ -0,0 +1,15 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Dsp; +using Ryujinx.Graphics.Nvdec.Vp9.Types; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Vp9 +{ + internal struct TileWorkerData + { + public Reader BitReader; + public MacroBlockD Xd; + /* dqcoeff are shared by all the planes. So planes must be decoded serially */ + public Array32> Dqcoeff; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs new file mode 100644 index 00000000..9e1cd8b4 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BModeInfo.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct BModeInfo + { + public PredictionMode Mode; + public Array2 Mv; // First, second inter predictor motion vectors + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs new file mode 100644 index 00000000..22a48e20 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/BlockSize.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum BlockSize + { + Block4x4 = 0, + Block4x8 = 1, + Block8x4 = 2, + Block8x8 = 3, + Block8x16 = 4, + Block16x8 = 5, + Block16x16 = 6, + Block16x32 = 7, + Block32x16 = 8, + Block32x32 = 9, + Block32x64 = 10, + Block64x32 = 11, + Block64x64 = 12, + BlockSizes = 13, + BlockInvalid = BlockSizes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs new file mode 100644 index 00000000..180d5e34 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Buf2D.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Buf2D + { + public ArrayPtr Buf; + public int Stride; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs new file mode 100644 index 00000000..a783999e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/FrameType.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum FrameType + { + KeyFrame = 0, + InterFrame = 1 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs new file mode 100644 index 00000000..8dc33bda --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilter.cs @@ -0,0 +1,27 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct LoopFilter + { + public int FilterLevel; + public int LastFiltLevel; + + public int SharpnessLevel; + public int LastSharpnessLevel; + + public bool ModeRefDeltaEnabled; + public bool ModeRefDeltaUpdate; + + // 0 = Intra, Last, GF, ARF + public Array4 RefDeltas; + public Array4 LastRefDeltas; + + // 0 = ZERO_MV, MV + public Array2 ModeDeltas; + public Array2 LastModeDeltas; + + public ArrayPtr Lfm; + public int LfmStride; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs new file mode 100644 index 00000000..0ac38a7b --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterInfoN.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct LoopFilterInfoN + { + public Array64 Lfthr; + public Array8>> Lvl; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs new file mode 100644 index 00000000..4aff843a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterMask.cs @@ -0,0 +1,24 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + // This structure holds bit masks for all 8x8 blocks in a 64x64 region. + // Each 1 bit represents a position in which we want to apply the loop filter. + // Left_ entries refer to whether we apply a filter on the border to the + // left of the block. Above_ entries refer to whether or not to apply a + // filter on the above border. Int_ entries refer to whether or not to + // apply borders on the 4x4 edges within the 8x8 block that each bit + // represents. + // Since each transform is accompanied by a potentially different type of + // loop filter there is a different entry in the array for each transform size. + internal struct LoopFilterMask + { + public Array4 LeftY; + public Array4 AboveY; + public ulong Int4x4Y; + public Array4 LeftUv; + public Array4 AboveUv; + public ushort Int4x4Uv; + public Array64 LflY; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs new file mode 100644 index 00000000..bea1d115 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/LoopFilterThresh.cs @@ -0,0 +1,13 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + // Need to align this structure so when it is declared and + // passed it can be loaded into vector registers. + internal struct LoopFilterThresh + { + public Array16 Mblim; + public Array16 Lim; + public Array16 HevThr; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs new file mode 100644 index 00000000..f1111528 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockD.cs @@ -0,0 +1,179 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MacroBlockD + { + public Array3 Plane; + public byte BmodeBlocksWl; + public byte BmodeBlocksHl; + + public Ptr Counts; + public TileInfo Tile; + + public int MiStride; + + // Grid of 8x8 cells is placed over the block. + // If some of them belong to the same mbtree-block + // they will just have same mi[i][j] value + public ArrayPtr> Mi; + public Ptr LeftMi; + public Ptr AboveMi; + + public uint MaxBlocksWide; + public uint MaxBlocksHigh; + + public ArrayPtr> PartitionProbs; + + /* Distance of MB away from frame edges */ + public int MbToLeftEdge; + public int MbToRightEdge; + public int MbToTopEdge; + public int MbToBottomEdge; + + public Ptr Fc; + + /* pointers to reference frames */ + public Array2> BlockRefs; + + /* pointer to current frame */ + public Surface CurBuf; + + public Array3> AboveContext; + public Array3> LeftContext; + + public ArrayPtr AboveSegContext; + public Array8 LeftSegContext; + + /* Bit depth: 8, 10, 12 */ + public int Bd; + + public bool Lossless; + public bool Corrupted; + + public Ptr ErrorInfo; + + public int GetPredContextSegId() + { + sbyte aboveSip = !AboveMi.IsNull ? AboveMi.Value.SegIdPredicted : (sbyte)0; + sbyte leftSip = !LeftMi.IsNull ? LeftMi.Value.SegIdPredicted : (sbyte)0; + + return aboveSip + leftSip; + } + + public int GetSkipContext() + { + int aboveSkip = !AboveMi.IsNull ? AboveMi.Value.Skip : 0; + int leftSkip = !LeftMi.IsNull ? LeftMi.Value.Skip : 0; + return aboveSkip + leftSkip; + } + + public int GetPredContextSwitchableInterp() + { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + int leftType = !LeftMi.IsNull ? LeftMi.Value.InterpFilter : Constants.SwitchableFilters; + int aboveType = !AboveMi.IsNull ? AboveMi.Value.InterpFilter : Constants.SwitchableFilters; + + if (leftType == aboveType) + { + return leftType; + } + else if (leftType == Constants.SwitchableFilters) + { + return aboveType; + } + else if (aboveType == Constants.SwitchableFilters) + { + return leftType; + } + else + { + return Constants.SwitchableFilters; + } + } + + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + // 0 - inter/inter, inter/--, --/inter, --/-- + // 1 - intra/inter, inter/intra + // 2 - intra/--, --/intra + // 3 - intra/intra + public int GetIntraInterContext() + { + if (!AboveMi.IsNull && !LeftMi.IsNull) + { // Both edges available + bool aboveIntra = !AboveMi.Value.IsInterBlock(); + bool leftIntra = !LeftMi.Value.IsInterBlock(); + return leftIntra && aboveIntra ? 3 : (leftIntra || aboveIntra ? 1 : 0); + } + else if (!AboveMi.IsNull || !LeftMi.IsNull) + { // One edge available + return 2 * (!(!AboveMi.IsNull ? AboveMi.Value : LeftMi.Value).IsInterBlock() ? 1 : 0); + } + return 0; + } + + // Returns a context number for the given MB prediction signal + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real blocks. + // The prediction flags in these dummy entries are initialized to 0. + public int GetTxSizeContext() + { + int maxTxSize = (int)Luts.MaxTxSizeLookup[(int)Mi[0].Value.SbType]; + int aboveCtx = (!AboveMi.IsNull && AboveMi.Value.Skip == 0) ? (int)AboveMi.Value.TxSize : maxTxSize; + int leftCtx = (!LeftMi.IsNull && LeftMi.Value.Skip == 0) ? (int)LeftMi.Value.TxSize : maxTxSize; + if (LeftMi.IsNull) + { + leftCtx = aboveCtx; + } + + if (AboveMi.IsNull) + { + aboveCtx = leftCtx; + } + + return (aboveCtx + leftCtx) > maxTxSize ? 1 : 0; + } + + public void SetupBlockPlanes(int ssX, int ssY) + { + int i; + + for (i = 0; i < Constants.MaxMbPlane; i++) + { + Plane[i].SubsamplingX = i != 0 ? ssX : 0; + Plane[i].SubsamplingY = i != 0 ? ssY : 0; + } + } + + public void SetSkipContext(int miRow, int miCol) + { + int aboveIdx = miCol * 2; + int leftIdx = (miRow * 2) & 15; + int i; + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + ref MacroBlockDPlane pd = ref Plane[i]; + pd.AboveContext = AboveContext[i].Slice(aboveIdx >> pd.SubsamplingX); + pd.LeftContext = new ArrayPtr(ref LeftContext[i][leftIdx >> pd.SubsamplingY], 16 - (leftIdx >> pd.SubsamplingY)); + } + } + + internal void SetMiRowCol(ref TileInfo tile, int miRow, int bh, int miCol, int bw, int miRows, int miCols) + { + MbToTopEdge = -((miRow * Constants.MiSize) * 8); + MbToBottomEdge = ((miRows - bh - miRow) * Constants.MiSize) * 8; + MbToLeftEdge = -((miCol * Constants.MiSize) * 8); + MbToRightEdge = ((miCols - bw - miCol) * Constants.MiSize) * 8; + + // Are edges available for intra prediction? + AboveMi = (miRow != 0) ? Mi[-MiStride] : Ptr.Null; + LeftMi = (miCol > tile.MiColStart) ? Mi[-1] : Ptr.Null; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs new file mode 100644 index 00000000..ae4ec6f4 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MacroBlockDPlane.cs @@ -0,0 +1,21 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MacroBlockDPlane + { + public ArrayPtr DqCoeff; + public int SubsamplingX; + public int SubsamplingY; + public Buf2D Dst; + public Array2 Pre; + public ArrayPtr AboveContext; + public ArrayPtr LeftContext; + public Array8> SegDequant; + + // Number of 4x4s in current block + public ushort N4W, N4H; + // Log2 of N4W, N4H + public byte N4Wl, N4Hl; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs new file mode 100644 index 00000000..8ef281d8 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ModeInfo.cs @@ -0,0 +1,66 @@ +using Ryujinx.Common.Memory; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct ModeInfo + { + // Common for both Inter and Intra blocks + public BlockSize SbType; + public PredictionMode Mode; + public TxSize TxSize; + public sbyte Skip; + public sbyte SegmentId; + public sbyte SegIdPredicted; // Valid only when TemporalUpdate is enabled + + // Only for Intra blocks + public PredictionMode UvMode; + + // Only for Inter blocks + public byte InterpFilter; + + // if ref_frame[idx] is equal to AltRefFrame then + // MacroBlockD.BlockRef[idx] is an altref + public Array2 RefFrame; + + public Array2 Mv; + + public Array4 Bmi; + + public PredictionMode GetYMode(int block) + { + return SbType < BlockSize.Block8x8 ? Bmi[block].Mode : Mode; + } + + public TxSize GetUvTxSize(ref MacroBlockDPlane pd) + { + Debug.Assert(SbType < BlockSize.Block8x8 || + Luts.SsSizeLookup[(int)SbType][pd.SubsamplingX][pd.SubsamplingY] != BlockSize.BlockInvalid); + return Luts.UvTxsizeLookup[(int)SbType][(int)TxSize][pd.SubsamplingX][pd.SubsamplingY]; + } + + public bool IsInterBlock() + { + return RefFrame[0] > Constants.IntraFrame; + } + + public bool HasSecondRef() + { + return RefFrame[1] > Constants.IntraFrame; + } + + private static readonly int[][] IdxNColumnToSubblock = new int[][] + { + new int[] { 1, 2 }, new int[] { 1, 3 }, new int[] { 3, 2 }, new int[] { 3, 3 } + }; + + // This function returns either the appropriate sub block or block's mv + // on whether the block_size < 8x8 and we have check_sub_blocks set. + public Mv GetSubBlockMv(int whichMv, int searchCol, int blockIdx) + { + return blockIdx >= 0 && SbType < BlockSize.Block8x8 + ? Bmi[IdxNColumnToSubblock[blockIdx][searchCol == 0 ? 1 : 0]].Mv[whichMv] + : Mv[whichMv]; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs new file mode 100644 index 00000000..319c8dba --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MotionVectorContext.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MotionVectorContext + { + BothZero = 0, + ZeroPlusPredicted = 1, + BothPredicted = 2, + NewPlusNonIntra = 3, + BothNew = 4, + IntraPlusNonIntra = 5, + BothIntra = 6, + InvalidCase = 9 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs new file mode 100644 index 00000000..c1f99ade --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv.cs @@ -0,0 +1,189 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Mv + { + public short Row; + public short Col; + + private static readonly byte[] LogInBase2 = new byte[] + { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 + }; + + public bool UseMvHp() + { + const int kMvRefThresh = 64; // Threshold for use of high-precision 1/8 mv + return Math.Abs(Row) < kMvRefThresh && Math.Abs(Col) < kMvRefThresh; + } + + public static bool MvJointVertical(MvJointType type) + { + return type == MvJointType.MvJointHzvnz || type == MvJointType.MvJointHnzvnz; + } + + public static bool MvJointHorizontal(MvJointType type) + { + return type == MvJointType.MvJointHnzvz || type == MvJointType.MvJointHnzvnz; + } + + private static int MvClassBase(MvClassType c) + { + return c != 0 ? Constants.Class0Size << ((int)c + 2) : 0; + } + + private static MvClassType GetMvClass(int z, Ptr offset) + { + MvClassType c = (z >= Constants.Class0Size * 4096) ? MvClassType.MvClass10 : (MvClassType)LogInBase2[z >> 3]; + if (!offset.IsNull) + { + offset.Value = z - MvClassBase(c); + } + + return c; + } + + private static void IncMvComponent(int v, ref Vp9BackwardUpdates counts, int comp, int incr, int usehp) + { + int s, z, c, o = 0, d, e, f; + Debug.Assert(v != 0); /* Should not be zero */ + s = v < 0 ? 1 : 0; + counts.Sign[comp][s] += (uint)incr; + z = (s != 0 ? -v : v) - 1; /* Magnitude - 1 */ + + c = (int)GetMvClass(z, new Ptr(ref o)); + counts.Classes[comp][c] += (uint)incr; + + d = (o >> 3); /* Int mv data */ + f = (o >> 1) & 3; /* Fractional pel mv data */ + e = (o & 1); /* High precision mv data */ + + if (c == (int)MvClassType.MvClass0) + { + counts.Class0[comp][d] += (uint)incr; + counts.Class0Fp[comp][d][f] += (uint)incr; + counts.Class0Hp[comp][e] += (uint)(usehp * incr); + } + else + { + int i; + int b = c + Constants.Class0Bits - 1; // Number of bits + for (i = 0; i < b; ++i) + { + counts.Bits[comp][i][((d >> i) & 1)] += (uint)incr; + } + + counts.Fp[comp][f] += (uint)incr; + counts.Hp[comp][e] += (uint)(usehp * incr); + } + } + + private MvJointType GetMvJoint() + { + if (Row == 0) + { + return Col == 0 ? MvJointType.MvJointZero : MvJointType.MvJointHnzvz; + } + else + { + return Col == 0 ? MvJointType.MvJointHzvnz : MvJointType.MvJointHnzvnz; + } + } + + internal void IncMv(Ptr counts) + { + if (!counts.IsNull) + { + MvJointType j = GetMvJoint(); + ++counts.Value.Joints[(int)j]; + + if (MvJointVertical(j)) + { + IncMvComponent(Row, ref counts.Value, 0, 1, 1); + } + + if (MvJointHorizontal(j)) + { + IncMvComponent(Col, ref counts.Value, 1, 1, 1); + } + } + } + + public void ClampMv(int minCol, int maxCol, int minRow, int maxRow) + { + Col = (short)Math.Clamp(Col, minCol, maxCol); + Row = (short)Math.Clamp(Row, minRow, maxRow); + } + + private const int MvBorder = (16 << 3); // Allow 16 pels in 1/8th pel units + + public void ClampMvRef(ref MacroBlockD xd) + { + ClampMv( + xd.MbToLeftEdge - MvBorder, + xd.MbToRightEdge + MvBorder, + xd.MbToTopEdge - MvBorder, + xd.MbToBottomEdge + MvBorder); + } + + public void LowerMvPrecision(bool allowHP) + { + bool useHP = allowHP && UseMvHp(); + if (!useHP) + { + if ((Row & 1) != 0) + { + Row += (short)(Row > 0 ? -1 : 1); + } + + if ((Col & 1) != 0) + { + Col += (short)(Col > 0 ? -1 : 1); + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs new file mode 100644 index 00000000..fb25d18e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Mv32.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Mv32 + { + public int Row; + public int Col; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs new file mode 100644 index 00000000..68a0b59a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvClassType.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MvClassType + { + MvClass0 = 0, /* (0, 2] integer pel */ + MvClass1 = 1, /* (2, 4] integer pel */ + MvClass2 = 2, /* (4, 8] integer pel */ + MvClass3 = 3, /* (8, 16] integer pel */ + MvClass4 = 4, /* (16, 32] integer pel */ + MvClass5 = 5, /* (32, 64] integer pel */ + MvClass6 = 6, /* (64, 128] integer pel */ + MvClass7 = 7, /* (128, 256] integer pel */ + MvClass8 = 8, /* (256, 512] integer pel */ + MvClass9 = 9, /* (512, 1024] integer pel */ + MvClass10 = 10, /* (1024,2048] integer pel */ + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs new file mode 100644 index 00000000..a20cb6d0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvJointType.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum MvJointType + { + MvJointZero = 0, /* Zero vector */ + MvJointHnzvz = 1, /* Vert zero, hor nonzero */ + MvJointHzvnz = 2, /* Hor zero, vert nonzero */ + MvJointHnzvnz = 3, /* Both components nonzero */ + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs new file mode 100644 index 00000000..71949a09 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/MvRef.cs @@ -0,0 +1,10 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct MvRef + { + public Array2 Mv; + public Array2 RefFrame; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs new file mode 100644 index 00000000..096f9818 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PartitionType.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PartitionType + { + PartitionNone, + PartitionHorz, + PartitionVert, + PartitionSplit, + PartitionTypes, + PartitionInvalid = PartitionTypes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs new file mode 100644 index 00000000..790aa2a0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PlaneType.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PlaneType + { + Y = 0, + Uv = 1, + PlaneTypes + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs new file mode 100644 index 00000000..0d3b56f6 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Position.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Position + { + public int Row; + public int Col; + + public Position(int row, int col) + { + Row = row; + Col = col; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs new file mode 100644 index 00000000..bbb9be9a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/PredictionMode.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum PredictionMode + { + DcPred = 0, // Average of above and left pixels + VPred = 1, // Vertical + HPred = 2, // Horizontal + D45Pred = 3, // Directional 45 deg = round(arctan(1 / 1) * 180 / pi) + D135Pred = 4, // Directional 135 deg = 180 - 45 + D117Pred = 5, // Directional 117 deg = 180 - 63 + D153Pred = 6, // Directional 153 deg = 180 - 27 + D207Pred = 7, // Directional 207 deg = 180 + 27 + D63Pred = 8, // Directional 63 deg = round(arctan(2 / 1) * 180 / pi) + TmPred = 9, // True-motion + NearestMv = 10, + NearMv = 11, + ZeroMv = 12, + NewMv = 13, + MbModeCount = 14 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs new file mode 100644 index 00000000..9942dd05 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/RefBuffer.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct RefBuffer + { + public Surface Buf; + public ScaleFactors Sf; + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs new file mode 100644 index 00000000..7cbf9f4e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ReferenceMode.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum ReferenceMode + { + SingleReference = 0, + CompoundReference = 1, + ReferenceModeSelect = 2, + ReferenceModes = 3 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs new file mode 100644 index 00000000..970f9680 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/ScaleFactors.cs @@ -0,0 +1,451 @@ +using Ryujinx.Common.Memory; +using System.Runtime.CompilerServices; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Convolve; +using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.Filter; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct ScaleFactors + { + private const int RefScaleShift = 14; + private const int RefNoScale = (1 << RefScaleShift); + private const int RefInvalidScale = -1; + + private unsafe delegate void ConvolveFn( + byte* src, + int srcStride, + byte* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h); + + private unsafe delegate void HighbdConvolveFn( + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + Array8[] filter, + int x0Q4, + int xStepQ4, + int y0Q4, + int yStepQ4, + int w, + int h, + int bd); + + private static readonly unsafe ConvolveFn[][][] PredictX16Y16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ConvolveCopy, + ConvolveAvg + }, + new ConvolveFn[] + { + Convolve8Vert, + Convolve8AvgVert + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Convolve8Horiz, + Convolve8AvgHoriz + }, + new ConvolveFn[] + { + Convolve8, + Convolve8Avg + } + } + }; + + private static readonly unsafe ConvolveFn[][][] PredictX16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledVert, + ScaledAvgVert + }, + new ConvolveFn[] + { + ScaledVert, + ScaledAvgVert + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe ConvolveFn[][][] PredictY16 = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledHoriz, + ScaledAvgHoriz + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + ScaledHoriz, + ScaledAvgHoriz + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe ConvolveFn[][][] Predict = new ConvolveFn[][][] + { + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + }, + new ConvolveFn[][] + { + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + }, + new ConvolveFn[] + { + Scaled2D, + ScaledAvg2D + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16Y16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolveCopy, + HighbdConvolveAvg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictX16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + }, + new HighbdConvolveFn[] + { + HighbdConvolve8Vert, + HighbdConvolve8AvgVert + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredictY16 = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8Horiz, + HighbdConvolve8AvgHoriz + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + private static readonly unsafe HighbdConvolveFn[][][] HighbdPredict = new HighbdConvolveFn[][][] + { + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + }, + new HighbdConvolveFn[][] + { + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + }, + new HighbdConvolveFn[] + { + HighbdConvolve8, + HighbdConvolve8Avg + } + } + }; + + public int XScaleFP; // Horizontal fixed point scale factor + public int YScaleFP; // Vertical fixed point scale factor + public int XStepQ4; + public int YStepQ4; + + public int ScaleValueX(int val) + { + return IsScaled() ? ScaledX(val) : val; + } + + public int ScaleValueY(int val) + { + return IsScaled() ? ScaledY(val) : val; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void InterPredict( + int horiz, + int vert, + int avg, + byte* src, + int srcStride, + byte* dst, + int dstStride, + int subpelX, + int subpelY, + int w, + int h, + Array8[] kernel, + int xs, + int ys) + { + if (XStepQ4 == 16) + { + if (YStepQ4 == 16) + { + // No scaling in either direction. + PredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + else + { + // No scaling in x direction. Must always scale in the y direction. + PredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + } + else + { + if (YStepQ4 == 16) + { + // No scaling in the y direction. Must always scale in the x direction. + PredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + else + { + // Must always scale in both directions. + Predict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h); + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void HighbdInterPredict( + int horiz, + int vert, + int avg, + ushort* src, + int srcStride, + ushort* dst, + int dstStride, + int subpelX, + int subpelY, + int w, + int h, + Array8[] kernel, + int xs, + int ys, + int bd) + { + if (XStepQ4 == 16) + { + if (YStepQ4 == 16) + { + // No scaling in either direction. + HighbdPredictX16Y16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + else + { + // No scaling in x direction. Must always scale in the y direction. + HighbdPredictX16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + } + else + { + if (YStepQ4 == 16) + { + // No scaling in the y direction. Must always scale in the x direction. + HighbdPredictY16[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + else + { + // Must always scale in both directions. + HighbdPredict[horiz][vert][avg](src, srcStride, dst, dstStride, kernel, subpelX, xs, subpelY, ys, w, h, bd); + } + } + } + + private int ScaledX(int val) + { + return (int)((long)val * XScaleFP >> RefScaleShift); + } + + private int ScaledY(int val) + { + return (int)((long)val * YScaleFP >> RefScaleShift); + } + + private static int GetFixedPointScaleFactor(int otherSize, int thisSize) + { + // Calculate scaling factor once for each reference frame + // and use fixed point scaling factors in decoding and encoding routines. + // Hardware implementations can calculate scale factor in device driver + // and use multiplication and shifting on hardware instead of division. + return (otherSize << RefScaleShift) / thisSize; + } + + public Mv32 ScaleMv(ref Mv mv, int x, int y) + { + int xOffQ4 = ScaledX(x << SubpelBits) & SubpelMask; + int yOffQ4 = ScaledY(y << SubpelBits) & SubpelMask; + Mv32 res = new Mv32() + { + Row = ScaledY(mv.Row) + yOffQ4, + Col = ScaledX(mv.Col) + xOffQ4 + }; + return res; + } + + public bool IsValidScale() + { + return XScaleFP != RefInvalidScale && YScaleFP != RefInvalidScale; + } + + public bool IsScaled() + { + return IsValidScale() && (XScaleFP != RefNoScale || YScaleFP != RefNoScale); + } + + public static bool ValidRefFrameSize(int refWidth, int refHeight, int thisWidth, int thisHeight) + { + return 2 * thisWidth >= refWidth && + 2 * thisHeight >= refHeight && + thisWidth <= 16 * refWidth && + thisHeight <= 16 * refHeight; + } + + public void SetupScaleFactorsForFrame(int otherW, int otherH, int thisW, int thisH) + { + if (!ValidRefFrameSize(otherW, otherH, thisW, thisH)) + { + XScaleFP = RefInvalidScale; + YScaleFP = RefInvalidScale; + return; + } + + XScaleFP = GetFixedPointScaleFactor(otherW, thisW); + YScaleFP = GetFixedPointScaleFactor(otherH, thisH); + XStepQ4 = ScaledX(16); + YStepQ4 = ScaledY(16); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs new file mode 100644 index 00000000..c3ea3fd8 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/SegLvlFeatures.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum SegLvlFeatures + { + SegLvlAltQ = 0, // Use alternate Quantizer .... + SegLvlAltLf = 1, // Use alternate loop filter value... + SegLvlRefFrame = 2, // Optional Segment reference frame + SegLvlSkip = 3, // Optional Segment (0,0) + skip mode + SegLvlMax = 4 // Number of features supported + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs new file mode 100644 index 00000000..53d1f2cc --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Segmentation.cs @@ -0,0 +1,71 @@ +using Ryujinx.Common.Memory; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Segmentation + { + private static readonly int[] SegFeatureDataSigned = new int[] { 1, 1, 0, 0 }; + private static readonly int[] SegFeatureDataMax = new int[] { QuantCommon.MaxQ, Vp9.LoopFilter.MaxLoopFilter, 3, 0 }; + + public bool Enabled; + public bool UpdateMap; + public byte UpdateData; + public byte AbsDelta; + public bool TemporalUpdate; + + public Array8> FeatureData; + public Array8 FeatureMask; + public int AqAvOffset; + + public static byte GetPredProbSegId(ref Array3 segPredProbs, ref MacroBlockD xd) + { + return segPredProbs[xd.GetPredContextSegId()]; + } + + public void ClearAllSegFeatures() + { + MemoryMarshal.CreateSpan(ref FeatureData[0][0], 8 * 4).Fill(0); + MemoryMarshal.CreateSpan(ref FeatureMask[0], 8).Fill(0); + AqAvOffset = 0; + } + + internal void EnableSegFeature(int segmentId, SegLvlFeatures featureId) + { + FeatureMask[segmentId] |= 1u << (int)featureId; + } + + internal static int FeatureDataMax(SegLvlFeatures featureId) + { + return SegFeatureDataMax[(int)featureId]; + } + + internal static int IsSegFeatureSigned(SegLvlFeatures featureId) + { + return SegFeatureDataSigned[(int)featureId]; + } + + internal void SetSegData(int segmentId, SegLvlFeatures featureId, int segData) + { + Debug.Assert(segData <= SegFeatureDataMax[(int)featureId]); + if (segData < 0) + { + Debug.Assert(SegFeatureDataSigned[(int)featureId] != 0); + Debug.Assert(-segData <= SegFeatureDataMax[(int)featureId]); + } + + FeatureData[segmentId][(int)featureId] = (short)segData; + } + + internal int IsSegFeatureActive(int segmentId, SegLvlFeatures featureId) + { + return Enabled && (FeatureMask[segmentId] & (1 << (int)featureId)) != 0 ? 1 : 0; + } + + internal short GetSegData(int segmentId, SegLvlFeatures featureId) + { + return FeatureData[segmentId][(int)featureId]; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs new file mode 100644 index 00000000..2b2a173e --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Surface.cs @@ -0,0 +1,80 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Surface : ISurface + { + public ArrayPtr YBuffer; + public ArrayPtr UBuffer; + public ArrayPtr VBuffer; + + public unsafe Plane YPlane => new Plane((IntPtr)YBuffer.ToPointer(), YBuffer.Length); + public unsafe Plane UPlane => new Plane((IntPtr)UBuffer.ToPointer(), UBuffer.Length); + public unsafe Plane VPlane => new Plane((IntPtr)VBuffer.ToPointer(), VBuffer.Length); + + public int Width { get; } + public int Height { get; } + public int AlignedWidth { get; } + public int AlignedHeight { get; } + public int Stride { get; } + public int UvWidth { get; } + public int UvHeight { get; } + public int UvAlignedWidth { get; } + public int UvAlignedHeight { get; } + public int UvStride { get; } + public bool HighBd => false; + + private readonly IntPtr _pointer; + + public Surface(int width, int height) + { + const int border = 32; + const int ssX = 1; + const int ssY = 1; + const bool highbd = false; + + int alignedWidth = (width + 7) & ~7; + int alignedHeight = (height + 7) & ~7; + int yStride = ((alignedWidth + 2 * border) + 31) & ~31; + int yplaneSize = (alignedHeight + 2 * border) * yStride; + int uvWidth = alignedWidth >> ssX; + int uvHeight = alignedHeight >> ssY; + int uvStride = yStride >> ssX; + int uvBorderW = border >> ssX; + int uvBorderH = border >> ssY; + int uvplaneSize = (uvHeight + 2 * uvBorderH) * uvStride; + + int frameSize = (highbd ? 2 : 1) * (yplaneSize + 2 * uvplaneSize); + + IntPtr pointer = Marshal.AllocHGlobal(frameSize); + _pointer = pointer; + Width = width; + Height = height; + AlignedWidth = alignedWidth; + AlignedHeight = alignedHeight; + Stride = yStride; + UvWidth = (width + ssX) >> ssX; + UvHeight = (height + ssY) >> ssY; + UvAlignedWidth = uvWidth; + UvAlignedHeight = uvHeight; + UvStride = uvStride; + + ArrayPtr NewPlane(int start, int size, int border) + { + return new ArrayPtr(pointer + start + border, size - border); + } + + YBuffer = NewPlane(0, yplaneSize, (border * yStride) + border); + UBuffer = NewPlane(yplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW); + VBuffer = NewPlane(yplaneSize + uvplaneSize, uvplaneSize, (uvBorderH * uvStride) + uvBorderW); + } + + public void Dispose() + { + Marshal.FreeHGlobal(_pointer); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs new file mode 100644 index 00000000..67289c47 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TileInfo.cs @@ -0,0 +1,85 @@ +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct TileInfo + { + private const int MinTileWidthB64 = 4; + private const int MaxTileWidthB64 = 64; + + public int MiRowStart, MiRowEnd; + public int MiColStart, MiColEnd; + + public static int MiColsAlignedToSb(int nMis) + { + return BitUtils.AlignPowerOfTwo(nMis, Constants.MiBlockSizeLog2); + } + + private static int GetTileOffset(int idx, int mis, int log2) + { + int sbCols = MiColsAlignedToSb(mis) >> Constants.MiBlockSizeLog2; + int offset = ((idx * sbCols) >> log2) << Constants.MiBlockSizeLog2; + return Math.Min(offset, mis); + } + + public void SetRow(ref Vp9Common cm, int row) + { + MiRowStart = GetTileOffset(row, cm.MiRows, cm.Log2TileRows); + MiRowEnd = GetTileOffset(row + 1, cm.MiRows, cm.Log2TileRows); + } + + public void SetCol(ref Vp9Common cm, int col) + { + MiColStart = GetTileOffset(col, cm.MiCols, cm.Log2TileCols); + MiColEnd = GetTileOffset(col + 1, cm.MiCols, cm.Log2TileCols); + } + + public void Init(ref Vp9Common cm, int row, int col) + { + SetRow(ref cm, row); + SetCol(ref cm, col); + } + + // Checks that the given miRow, miCol and search point + // are inside the borders of the tile. + public bool IsInside(int miCol, int miRow, int miRows, ref Position miPos) + { + return !(miRow + miPos.Row < 0 || + miCol + miPos.Col < MiColStart || + miRow + miPos.Row >= miRows || + miCol + miPos.Col >= MiColEnd); + } + + private static int GetMinLog2TileCols(int sb64Cols) + { + int minLog2 = 0; + while ((MaxTileWidthB64 << minLog2) < sb64Cols) + { + ++minLog2; + } + + return minLog2; + } + + private static int GetMaxLog2TileCols(int sb64Cols) + { + int maxLog2 = 1; + while ((sb64Cols >> maxLog2) >= MinTileWidthB64) + { + ++maxLog2; + } + + return maxLog2 - 1; + } + + public static void GetTileNBits(int miCols, ref int minLog2TileCols, ref int maxLog2TileCols) + { + int sb64Cols = MiColsAlignedToSb(miCols) >> Constants.MiBlockSizeLog2; + minLog2TileCols = GetMinLog2TileCols(sb64Cols); + maxLog2TileCols = GetMaxLog2TileCols(sb64Cols); + Debug.Assert(minLog2TileCols <= maxLog2TileCols); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs new file mode 100644 index 00000000..db914525 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxMode.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + public enum TxMode + { + Only4X4 = 0, // Only 4x4 transform used + Allow8X8 = 1, // Allow block transform size up to 8x8 + Allow16X16 = 2, // Allow block transform size up to 16x16 + Allow32X32 = 3, // Allow block transform size up to 32x32 + TxModeSelect = 4, // Transform specified for each block + TxModes = 5 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs new file mode 100644 index 00000000..994deb2c --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxSize.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + public enum TxSize + { + Tx4x4 = 0, // 4x4 transform + Tx8x8 = 1, // 8x8 transform + Tx16x16 = 2, // 16x16 transform + Tx32x32 = 3, // 32x32 transform + TxSizes = 4 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs new file mode 100644 index 00000000..dbf7251c --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/TxType.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal enum TxType + { + DctDct = 0, // DCT in both horizontal and vertical + AdstDct = 1, // ADST in vertical, DCT in horizontal + DctAdst = 2, // DCT in vertical, ADST in horizontal + AdstAdst = 3, // ADST in both directions + TxTypes = 4 + } +} diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs new file mode 100644 index 00000000..0dafb820 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec.Vp9/Types/Vp9Common.cs @@ -0,0 +1,334 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Nvdec.Vp9.Common; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Vp9.Types +{ + internal struct Vp9Common + { + public MacroBlockD Mb; + + public ArrayPtr TileWorkerData; + + public InternalErrorInfo Error; + + public int Width; + public int Height; + + public int SubsamplingX; + public int SubsamplingY; + + public ArrayPtr PrevFrameMvs; + public ArrayPtr CurFrameMvs; + + public Array3 FrameRefs; + + public FrameType FrameType; + + // Flag signaling that the frame is encoded using only Intra modes. + public bool IntraOnly; + + public bool AllowHighPrecisionMv; + + // MBs, MbRows/Cols is in 16-pixel units; MiRows/Cols is in + // ModeInfo (8-pixel) units. + public int MBs; + public int MbRows, MiRows; + public int MbCols, MiCols; + public int MiStride; + + /* Profile settings */ + public TxMode TxMode; + + public int BaseQindex; + public int YDcDeltaQ; + public int UvDcDeltaQ; + public int UvAcDeltaQ; + public Array8> YDequant; + public Array8> UvDequant; + + /* We allocate a ModeInfo struct for each macroblock, together with + an extra row on top and column on the left to simplify prediction. */ + public ArrayPtr Mip; /* Base of allocated array */ + public ArrayPtr Mi; /* Corresponds to upper left visible macroblock */ + + public ArrayPtr> MiGridBase; + public ArrayPtr> MiGridVisible; + + // Whether to use previous frame's motion vectors for prediction. + public bool UsePrevFrameMvs; + + // Persistent mb segment id map used in prediction. + public int SegMapIdx; + public int PrevSegMapIdx; + + public Array2> SegMapArray; + public ArrayPtr LastFrameSegMap; + public ArrayPtr CurrentFrameSegMap; + + public byte InterpFilter; + + public LoopFilterInfoN LfInfo; + + public Array4 RefFrameSignBias; /* Two state 0, 1 */ + + public LoopFilter Lf; + public Segmentation Seg; + + // Context probabilities for reference frame prediction + public sbyte CompFixedRef; + public Array2 CompVarRef; + public ReferenceMode ReferenceMode; + + public Ptr Fc; + public Ptr Counts; + + public bool FrameParallelDecodingMode; + + public int Log2TileCols, Log2TileRows; + + public ArrayPtr AboveSegContext; + public ArrayPtr AboveContext; + public int AboveContextAllocCols; + + public bool FrameIsIntraOnly() + { + return FrameType == FrameType.KeyFrame || IntraOnly; + } + + public bool CompoundReferenceAllowed() + { + int i; + for (i = 1; i < Constants.RefsPerFrame; ++i) + { + if (RefFrameSignBias[i + 1] != RefFrameSignBias[1]) + { + return true; + } + } + + return false; + } + + private static int CalcMiSize(int len) + { + // Len is in mi units. + return len + Constants.MiBlockSize; + } + + public void SetMbMi(int width, int height) + { + int alignedWidth = BitUtils.AlignPowerOfTwo(width, Constants.MiSizeLog2); + int alignedHeight = BitUtils.AlignPowerOfTwo(height, Constants.MiSizeLog2); + + MiCols = alignedWidth >> Constants.MiSizeLog2; + MiRows = alignedHeight >> Constants.MiSizeLog2; + MiStride = CalcMiSize(MiCols); + + MbCols = (MiCols + 1) >> 1; + MbRows = (MiRows + 1) >> 1; + MBs = MbRows * MbCols; + } + + public void AllocTileWorkerData(MemoryAllocator allocator, int tileCols, int tileRows) + { + TileWorkerData = allocator.Allocate(tileCols * tileRows); + } + + public void FreeTileWorkerData(MemoryAllocator allocator) + { + allocator.Free(TileWorkerData); + } + + private void AllocSegMap(MemoryAllocator allocator, int segMapSize) + { + int i; + + for (i = 0; i < Constants.NumPingPongBuffers; ++i) + { + SegMapArray[i] = allocator.Allocate(segMapSize); + } + + // Init the index. + SegMapIdx = 0; + PrevSegMapIdx = 1; + + CurrentFrameSegMap = SegMapArray[SegMapIdx]; + LastFrameSegMap = SegMapArray[PrevSegMapIdx]; + } + + private void FreeSegMap(MemoryAllocator allocator) + { + int i; + + for (i = 0; i < Constants.NumPingPongBuffers; ++i) + { + allocator.Free(SegMapArray[i]); + SegMapArray[i] = ArrayPtr.Null; + } + + CurrentFrameSegMap = ArrayPtr.Null; + LastFrameSegMap = ArrayPtr.Null; + } + + private void DecAllocMi(MemoryAllocator allocator, int miSize) + { + Mip = allocator.Allocate(miSize); + MiGridBase = allocator.Allocate>(miSize); + } + + private void DecFreeMi(MemoryAllocator allocator) + { + allocator.Free(Mip); + Mip = ArrayPtr.Null; + allocator.Free(MiGridBase); + MiGridBase = ArrayPtr>.Null; + } + + public void FreeContextBuffers(MemoryAllocator allocator) + { + DecFreeMi(allocator); + FreeSegMap(allocator); + allocator.Free(AboveContext); + AboveContext = ArrayPtr.Null; + allocator.Free(AboveSegContext); + AboveSegContext = ArrayPtr.Null; + allocator.Free(Lf.Lfm); + Lf.Lfm = ArrayPtr.Null; + allocator.Free(CurFrameMvs); + CurFrameMvs = ArrayPtr.Null; + if (UsePrevFrameMvs) + { + allocator.Free(PrevFrameMvs); + PrevFrameMvs = ArrayPtr.Null; + } + } + + private void AllocLoopFilter(MemoryAllocator allocator) + { + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + Lf.LfmStride = (MiCols + (Constants.MiBlockSize - 1)) >> 3; + Lf.Lfm = allocator.Allocate(((MiRows + (Constants.MiBlockSize - 1)) >> 3) * Lf.LfmStride); + } + + public void AllocContextBuffers(MemoryAllocator allocator, int width, int height) + { + SetMbMi(width, height); + int newMiSize = MiStride * CalcMiSize(MiRows); + if (newMiSize != 0) + { + DecAllocMi(allocator, newMiSize); + } + + if (MiRows * MiCols != 0) + { + // Create the segmentation map structure and set to 0. + AllocSegMap(allocator, MiRows * MiCols); + } + + if (MiCols != 0) + { + AboveContext = allocator.Allocate(2 * TileInfo.MiColsAlignedToSb(MiCols) * Constants.MaxMbPlane); + AboveSegContext = allocator.Allocate(TileInfo.MiColsAlignedToSb(MiCols)); + } + + AllocLoopFilter(allocator); + + CurFrameMvs = allocator.Allocate(MiRows * MiCols); + // Using the same size as the current frame is fine here, + // as this is never true when we have a resolution change. + if (UsePrevFrameMvs) + { + PrevFrameMvs = allocator.Allocate(MiRows * MiCols); + } + } + + private unsafe void DecSetupMi() + { + Mi = Mip.Slice(MiStride + 1); + MiGridVisible = MiGridBase.Slice(MiStride + 1); + MemoryUtil.Fill(MiGridBase.ToPointer(), Ptr.Null, MiStride * (MiRows + 1)); + } + + public unsafe void InitContextBuffers() + { + DecSetupMi(); + if (!LastFrameSegMap.IsNull) + { + MemoryUtil.Fill(LastFrameSegMap.ToPointer(), (byte)0, MiRows * MiCols); + } + } + + private void SetPartitionProbs(ref MacroBlockD xd) + { + xd.PartitionProbs = FrameIsIntraOnly() + ? new ArrayPtr>(ref Fc.Value.KfPartitionProb[0], 16) + : new ArrayPtr>(ref Fc.Value.PartitionProb[0], 16); + } + + internal void InitMacroBlockD(ref MacroBlockD xd, ArrayPtr dqcoeff) + { + int i; + + for (i = 0; i < Constants.MaxMbPlane; ++i) + { + xd.Plane[i].DqCoeff = dqcoeff; + xd.AboveContext[i] = AboveContext.Slice(i * 2 * TileInfo.MiColsAlignedToSb(MiCols)); + + if (i == 0) + { + MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref YDequant); + } + else + { + MemoryUtil.Copy(ref xd.Plane[i].SegDequant, ref UvDequant); + } + xd.Fc = new Ptr(ref Fc.Value); + } + + xd.AboveSegContext = AboveSegContext; + xd.MiStride = MiStride; + xd.ErrorInfo = new Ptr(ref Error); + + SetPartitionProbs(ref xd); + } + + public void SetupSegmentationDequant() + { + const BitDepth bitDepth = BitDepth.Bits8; // TODO: Configurable + // Build y/uv dequant values based on segmentation. + if (Seg.Enabled) + { + int i; + for (i = 0; i < Constants.MaxSegments; ++i) + { + int qIndex = QuantCommon.GetQIndex(ref Seg, i, BaseQindex); + YDequant[i][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth); + YDequant[i][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth); + UvDequant[i][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth); + UvDequant[i][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth); + } + } + else + { + int qIndex = BaseQindex; + // When segmentation is disabled, only the first value is used. The + // remaining are don't cares. + YDequant[0][0] = QuantCommon.DcQuant(qIndex, YDcDeltaQ, bitDepth); + YDequant[0][1] = QuantCommon.AcQuant(qIndex, 0, bitDepth); + UvDequant[0][0] = QuantCommon.DcQuant(qIndex, UvDcDeltaQ, bitDepth); + UvDequant[0][1] = QuantCommon.AcQuant(qIndex, UvAcDeltaQ, bitDepth); + } + } + + public void SetupScaleFactors() + { + for (int i = 0; i < Constants.RefsPerFrame; ++i) + { + ref RefBuffer refBuf = ref FrameRefs[i]; + refBuf.Sf.SetupScaleFactorsForFrame(refBuf.Buf.Width, refBuf.Buf.Height, Width, Height); + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs b/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs deleted file mode 100644 index c54a95f9..00000000 --- a/Ryujinx.Graphics.Nvdec/CdmaProcessor.cs +++ /dev/null @@ -1,103 +0,0 @@ -using Ryujinx.Graphics.Gpu; -using Ryujinx.Graphics.VDec; -using Ryujinx.Graphics.Vic; -using System.Collections.Generic; - -namespace Ryujinx.Graphics -{ - public class CdmaProcessor - { - private const int MethSetMethod = 0x10; - private const int MethSetData = 0x11; - - private readonly VideoDecoder _videoDecoder; - private readonly VideoImageComposer _videoImageComposer; - - public CdmaProcessor() - { - _videoDecoder = new VideoDecoder(); - _videoImageComposer = new VideoImageComposer(_videoDecoder); - } - - public void PushCommands(GpuContext gpu, int[] cmdBuffer) - { - List commands = new List(); - - ChClassId currentClass = 0; - - for (int index = 0; index < cmdBuffer.Length; index++) - { - int cmd = cmdBuffer[index]; - - int value = (cmd >> 0) & 0xffff; - int methodOffset = (cmd >> 16) & 0xfff; - - ChSubmissionMode submissionMode = (ChSubmissionMode)((cmd >> 28) & 0xf); - - switch (submissionMode) - { - case ChSubmissionMode.SetClass: currentClass = (ChClassId)(value >> 6); break; - - case ChSubmissionMode.Incrementing: - { - int count = value; - - for (int argIdx = 0; argIdx < count; argIdx++) - { - int argument = cmdBuffer[++index]; - - commands.Add(new ChCommand(currentClass, methodOffset + argIdx, argument)); - } - - break; - } - - case ChSubmissionMode.NonIncrementing: - { - int count = value; - - int[] arguments = new int[count]; - - for (int argIdx = 0; argIdx < count; argIdx++) - { - arguments[argIdx] = cmdBuffer[++index]; - } - - commands.Add(new ChCommand(currentClass, methodOffset, arguments)); - - break; - } - } - } - - ProcessCommands(gpu, commands.ToArray()); - } - - private void ProcessCommands(GpuContext gpu, ChCommand[] commands) - { - int methodOffset = 0; - - foreach (ChCommand command in commands) - { - switch (command.MethodOffset) - { - case MethSetMethod: methodOffset = command.Arguments[0]; break; - - case MethSetData: - { - if (command.ClassId == ChClassId.NvDec) - { - _videoDecoder.Process(gpu, methodOffset, command.Arguments); - } - else if (command.ClassId == ChClassId.GraphicsVic) - { - _videoImageComposer.Process(gpu, methodOffset, command.Arguments); - } - - break; - } - } - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChClassId.cs b/Ryujinx.Graphics.Nvdec/ChClassId.cs deleted file mode 100644 index 115f0b89..00000000 --- a/Ryujinx.Graphics.Nvdec/ChClassId.cs +++ /dev/null @@ -1,20 +0,0 @@ -namespace Ryujinx.Graphics -{ - enum ChClassId - { - Host1X = 0x1, - VideoEncodeMpeg = 0x20, - VideoEncodeNvEnc = 0x21, - VideoStreamingVi = 0x30, - VideoStreamingIsp = 0x32, - VideoStreamingIspB = 0x34, - VideoStreamingViI2c = 0x36, - GraphicsVic = 0x5d, - Graphics3D = 0x60, - GraphicsGpu = 0x61, - Tsec = 0xe0, - TsecB = 0xe1, - NvJpg = 0xc0, - NvDec = 0xf0 - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs b/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs deleted file mode 100644 index b01b77ed..00000000 --- a/Ryujinx.Graphics.Nvdec/ChCommandEntry.cs +++ /dev/null @@ -1,18 +0,0 @@ -namespace Ryujinx.Graphics -{ - struct ChCommand - { - public ChClassId ClassId { get; private set; } - - public int MethodOffset { get; private set; } - - public int[] Arguments { get; private set; } - - public ChCommand(ChClassId classId, int methodOffset, params int[] arguments) - { - ClassId = classId; - MethodOffset = methodOffset; - Arguments = arguments; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs b/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs deleted file mode 100644 index 5c653019..00000000 --- a/Ryujinx.Graphics.Nvdec/ChSubmissionMode.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace Ryujinx.Graphics -{ - enum ChSubmissionMode - { - SetClass = 0, - Incrementing = 1, - NonIncrementing = 2, - Mask = 3, - Immediate = 4, - Restart = 5, - Gather = 6 - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/CodecId.cs b/Ryujinx.Graphics.Nvdec/CodecId.cs new file mode 100644 index 00000000..9aaa3d02 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/CodecId.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics.Nvdec +{ + public enum CodecId + { + Mpeg = 1, + Vc1 = 2, + H264 = 3, + Mpeg4 = 4, + Vp8 = 5, + Hevc = 7, + Vp9 = 9 + } +} diff --git a/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs b/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs new file mode 100644 index 00000000..f5074f48 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/FrameDecodedEventArgs.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Nvdec +{ + public struct FrameDecodedEventArgs + { + public CodecId CodecId { get; } + public uint LumaOffset { get; } + public uint ChromaOffset { get; } + + internal FrameDecodedEventArgs(CodecId codecId, uint lumaOffset, uint chromaOffset) + { + CodecId = codecId; + LumaOffset = lumaOffset; + ChromaOffset = chromaOffset; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/H264Decoder.cs new file mode 100644 index 00000000..57ce12d0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/H264Decoder.cs @@ -0,0 +1,43 @@ +using Ryujinx.Graphics.Nvdec.H264; +using Ryujinx.Graphics.Nvdec.Image; +using Ryujinx.Graphics.Nvdec.Types.H264; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + static class H264Decoder + { + private const int MbSizeInPixels = 16; + + private static readonly Decoder _decoder = new Decoder(); + + public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + { + PictureInfo pictureInfo = rm.Gmm.DeviceRead(state.SetPictureInfoOffset); + H264PictureInfo info = pictureInfo.Convert(); + + ReadOnlySpan bitstream = rm.Gmm.DeviceGetSpan(state.SetBitstreamOffset, (int)pictureInfo.BitstreamSize); + + int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels; + int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels; + + ISurface outputSurface = rm.Cache.Get(_decoder, CodecId.H264, 0, 0, width, height); + + if (_decoder.Decode(ref info, outputSurface, bitstream)) + { + int li = (int)pictureInfo.LumaOutputSurfaceIndex; + int ci = (int)pictureInfo.ChromaOutputSurfaceIndex; + + uint lumaOffset = state.SetSurfaceLumaOffset[li]; + uint chromaOffset = state.SetSurfaceChromaOffset[ci]; + + SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); + + device.OnFrameDecoded(CodecId.H264, lumaOffset, chromaOffset); + } + + rm.Cache.Put(outputSurface); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs new file mode 100644 index 00000000..c362185f --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs @@ -0,0 +1,151 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + class SurfaceCache + { + // Must be equal to at least the maximum number of surfaces + // that can be in use simultaneously (which is 17, since H264 + // can have up to 16 reference frames, and we need another one + // for the current frame). + // Realistically, most codecs won't ever use more than 4 simultaneously. + private const int MaxItems = 17; + + private struct CacheItem + { + public int ReferenceCount; + public uint LumaOffset; + public uint ChromaOffset; + public int Width; + public int Height; + public CodecId CodecId; + public ISurface Surface; + } + + private readonly CacheItem[] _pool = new CacheItem[MaxItems]; + + private readonly MemoryManager _gmm; + + public SurfaceCache(MemoryManager gmm) + { + _gmm = gmm; + } + + public ISurface Get(IDecoder decoder, CodecId codecId, uint lumaOffset, uint chromaOffset, int width, int height) + { + ISurface surface = null; + + // Try to find a compatible surface with same parameters, and same offsets. + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.LumaOffset == lumaOffset && + item.ChromaOffset == chromaOffset && + item.CodecId == codecId && + item.Width == width && + item.Height == height) + { + item.ReferenceCount++; + surface = item.Surface; + MoveToFront(i); + break; + } + } + + // If we failed to find a perfect match, now ignore the offsets. + // Search backwards to replace the oldest compatible surface, + // this avoids thrashing frquently used surfaces. + // Now we need to ensure that the surface is not in use, as we'll change the data. + if (surface == null) + { + for (int i = MaxItems - 1; i >= 0; i--) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0 && item.CodecId == codecId && item.Width == width && item.Height == height) + { + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + surface = item.Surface; + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(i); + break; + } + } + } + + // If everything else failed, we try to create a new surface, + // and insert it on the pool. We replace the oldest item on the + // pool to avoid thrashing frequently used surfaces. + // If even the oldest item is in use, that means that the entire pool + // is in use, in that case we throw as there's no place to insert + // the new surface. + if (surface == null) + { + if (_pool[MaxItems - 1].ReferenceCount == 0) + { + surface = decoder.CreateSurface(width, height); + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(MaxItems - 1); + ref CacheItem item = ref _pool[0]; + item.Surface?.Dispose(); + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + item.Width = width; + item.Height = height; + item.CodecId = codecId; + item.Surface = surface; + } + else + { + throw new InvalidOperationException("No free slot on the surface pool."); + } + } + + return surface; + } + + public void Put(ISurface surface) + { + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.Surface == surface) + { + item.ReferenceCount--; + Debug.Assert(item.ReferenceCount >= 0); + break; + } + } + } + + private void MoveToFront(int index) + { + // If index is 0 we don't need to do anything, + // as it's already on the front. + if (index != 0) + { + CacheItem temp = _pool[index]; + Array.Copy(_pool, 0, _pool, 1, index); + _pool[0] = temp; + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs new file mode 100644 index 00000000..6087f5b1 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs @@ -0,0 +1,26 @@ +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceCommon + { + public static int GetBlockLinearSize(int width, int height, int bytesPerPixel) + { + return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, 2, 1, 1).TotalSize; + } + + public static void Copy(ISurface src, ISurface dst) + { + src.YPlane.AsSpan().CopyTo(dst.YPlane.AsSpan()); + src.UPlane.AsSpan().CopyTo(dst.UPlane.AsSpan()); + src.VPlane.AsSpan().CopyTo(dst.VPlane.AsSpan()); + } + + public unsafe static Span AsSpan(this Plane plane) + { + return new Span((void*)plane.Pointer, plane.Length); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs new file mode 100644 index 00000000..a8199932 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs @@ -0,0 +1,133 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceReader + { + public static void Read(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset) + { + int width = surface.Width; + int height = surface.Height; + int stride = surface.Stride; + + ReadOnlySpan luma = gmm.DeviceGetSpan(lumaOffset, GetBlockLinearSize(width, height, 1)); + + ReadLuma(surface.YPlane.AsSpan(), luma, stride, width, height); + + int uvWidth = surface.UvWidth; + int uvHeight = surface.UvHeight; + int uvStride = surface.UvStride; + + ReadOnlySpan chroma = gmm.DeviceGetSpan(chromaOffset, GetBlockLinearSize(uvWidth, uvHeight, 2)); + + ReadChroma(surface.UPlane.AsSpan(), surface.VPlane.AsSpan(), chroma, uvStride, uvWidth, uvHeight); + } + + private static void ReadLuma(Span dst, ReadOnlySpan src, int dstStride, int width, int height) + { + LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, 1, 2, src); + } + + private unsafe static void ReadChroma( + Span dstU, + Span dstV, + ReadOnlySpan src, + int dstStride, + int width, + int height) + { + OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); + + if (Sse2.IsSupported) + { + int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); + + int outStrideGap = dstStride - width; + + fixed (byte* dstUPtr = dstU, dstVPtr = dstV, dataPtr = src) + { + byte* uPtr = dstUPtr; + byte* vPtr = dstVPtr; + + for (int y = 0; y < height; y++) + { + calc.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32) + { + byte* offset = dataPtr + calc.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)offset; + Vector128 value2 = *(Vector128*)offset2; + Vector128 value3 = *(Vector128*)offset3; + Vector128 value4 = *(Vector128*)offset4; + + Vector128 u00 = Sse2.UnpackLow(value, value2); + Vector128 v00 = Sse2.UnpackHigh(value, value2); + Vector128 u01 = Sse2.UnpackLow(value3, value4); + Vector128 v01 = Sse2.UnpackHigh(value3, value4); + + Vector128 u10 = Sse2.UnpackLow(u00, v00); + Vector128 v10 = Sse2.UnpackHigh(u00, v00); + Vector128 u11 = Sse2.UnpackLow(u01, v01); + Vector128 v11 = Sse2.UnpackHigh(u01, v01); + + Vector128 u20 = Sse2.UnpackLow(u10, v10); + Vector128 v20 = Sse2.UnpackHigh(u10, v10); + Vector128 u21 = Sse2.UnpackLow(u11, v11); + Vector128 v21 = Sse2.UnpackHigh(u11, v11); + + Vector128 u30 = Sse2.UnpackLow(u20, v20); + Vector128 v30 = Sse2.UnpackHigh(u20, v20); + Vector128 u31 = Sse2.UnpackLow(u21, v21); + Vector128 v31 = Sse2.UnpackHigh(u21, v21); + + *(Vector128*)uPtr = u30; + *(Vector128*)(uPtr + 16) = u31; + *(Vector128*)vPtr = v30; + *(Vector128*)(vPtr + 16) = v31; + } + + for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++) + { + byte* offset = dataPtr + calc.GetOffset(x); + + *uPtr = *offset; + *vPtr = *(offset + 1); + } + + uPtr += outStrideGap; + vPtr += outStrideGap; + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int dstBaseOffset = y * dstStride; + + calc.SetY(y); + + for (int x = 0; x < width; x++) + { + int srcOffset = calc.GetOffset(x); + + dstU[dstBaseOffset + x] = src[srcOffset]; + dstV[dstBaseOffset + x] = src[srcOffset + 1]; + } + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs new file mode 100644 index 00000000..5c294621 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs @@ -0,0 +1,126 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; +using static Ryujinx.Graphics.Nvdec.MemoryExtensions; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceWriter + { + public static void Write(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset) + { + int lumaSize = GetBlockLinearSize(surface.Width, surface.Height, 1); + + using var luma = gmm.GetWritableRegion(ExtendOffset(lumaOffset), lumaSize); + + WriteLuma( + luma.Memory.Span, + surface.YPlane.AsSpan(), + surface.Stride, + surface.Width, + surface.Height); + + int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight, 2); + + using var chroma = gmm.GetWritableRegion(ExtendOffset(chromaOffset), chromaSize); + + WriteChroma( + chroma.Memory.Span, + surface.UPlane.AsSpan(), + surface.VPlane.AsSpan(), + surface.UvStride, + surface.UvWidth, + surface.UvHeight); + } + + private static void WriteLuma(Span dst, ReadOnlySpan src, int srcStride, int width, int height) + { + LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src); + } + + private unsafe static void WriteChroma( + Span dst, + ReadOnlySpan srcU, + ReadOnlySpan srcV, + int srcStride, + int width, + int height) + { + OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); + + if (Sse2.IsSupported) + { + int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); + + int inStrideGap = srcStride - width; + + fixed (byte* outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV) + { + byte* inUPtr = srcUPtr; + byte* inVPtr = srcVPtr; + + for (int y = 0; y < height; y++) + { + calc.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32) + { + byte* offset = outputPtr + calc.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)inUPtr; + Vector128 value2 = *(Vector128*)inVPtr; + Vector128 value3 = *(Vector128*)(inUPtr + 16); + Vector128 value4 = *(Vector128*)(inVPtr + 16); + + Vector128 uv0 = Sse2.UnpackLow(value, value2); + Vector128 uv1 = Sse2.UnpackHigh(value, value2); + Vector128 uv2 = Sse2.UnpackLow(value3, value4); + Vector128 uv3 = Sse2.UnpackHigh(value3, value4); + + *(Vector128*)offset = uv0; + *(Vector128*)offset2 = uv1; + *(Vector128*)offset3 = uv2; + *(Vector128*)offset4 = uv3; + } + + for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++) + { + byte* offset = outputPtr + calc.GetOffset(x); + + *offset = *inUPtr; + *(offset + 1) = *inVPtr; + } + + inUPtr += inStrideGap; + inVPtr += inStrideGap; + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int srcBaseOffset = y * srcStride; + + calc.SetY(y); + + for (int x = 0; x < width; x++) + { + int dstOffset = calc.GetOffset(x); + + dst[dstOffset + 0] = srcU[srcBaseOffset + x]; + dst[dstOffset + 1] = srcV[srcBaseOffset + x]; + } + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs b/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs new file mode 100644 index 00000000..2855a8c7 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs @@ -0,0 +1,28 @@ +using Ryujinx.Graphics.Gpu.Memory; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + static class MemoryExtensions + { + public static T DeviceRead(this MemoryManager gmm, uint offset) where T : unmanaged + { + return gmm.Read((ulong)offset << 8); + } + + public static ReadOnlySpan DeviceGetSpan(this MemoryManager gmm, uint offset, int size) + { + return gmm.GetSpan((ulong)offset << 8, size); + } + + public static void DeviceWrite(this MemoryManager gmm, uint offset, ReadOnlySpan data) + { + gmm.Write((ulong)offset << 8, data); + } + + public static ulong ExtendOffset(uint offset) + { + return (ulong)offset << 8; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs new file mode 100644 index 00000000..cc22cb2a --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs @@ -0,0 +1,55 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Nvdec +{ + public class NvdecDevice : IDeviceState + { + private readonly ResourceManager _rm; + private readonly DeviceState _state; + + public event Action FrameDecoded; + + public NvdecDevice(MemoryManager gmm) + { + _rm = new ResourceManager(gmm, new SurfaceCache(gmm)); + _state = new DeviceState(new Dictionary + { + { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) } + }); + } + + public int Read(int offset) => _state.Read(offset); + public void Write(int offset, int data) => _state.Write(offset, data); + + private void Execute(int data) + { + Decode((CodecId)_state.State.SetCodecID); + } + + private void Decode(CodecId codecId) + { + switch (codecId) + { + case CodecId.H264: + H264Decoder.Decode(this, _rm, ref _state.State); + break; + case CodecId.Vp9: + Vp9Decoder.Decode(this, _rm, ref _state.State); + break; + default: + Logger.PrintError(LogClass.Nvdec, $"Unsupported codec \"{codecId}\"."); + break; + } + } + + internal void OnFrameDecoded(CodecId codecId, uint lumaOffset, uint chromaOffset) + { + FrameDecoded?.Invoke(new FrameDecodedEventArgs(codecId, lumaOffset, chromaOffset)); + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs b/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs new file mode 100644 index 00000000..b40e08b0 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs @@ -0,0 +1,41 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec +{ + // Note: Most of those names are not official. + unsafe struct NvdecRegisters + { + public fixed uint Reserved0[128]; + public uint SetCodecID; + public fixed uint Reserved204[63]; + public uint Execute; + public fixed uint Reserved304[63]; + public uint SetPlatformID; + public uint SetPictureInfoOffset; + public uint SetBitstreamOffset; + public uint SetFrameNumber; + public uint SetH264SliceDataOffsetsOffset; // Also used by VC1 + public uint SetH264MvDumpOffset; // Also used by VC1 + public uint Unknown418; // Used by VC1 + public uint Unknown41C; + public uint Unknown420; // Used by VC1 + public uint SetFrameStatsOffset; + public uint SetH264LastSurfaceLumaOffset; + public uint SetH264LastSurfaceChromaOffset; + public Array17 SetSurfaceLumaOffset; + public Array17 SetSurfaceChromaOffset; + public uint Unknown4B8; + public uint Unknown4BC; + public uint SetCryptoData0Offset; + public uint SetCryptoData1Offset; + public Array62 Unknown4C8; + public uint SetVp9EntropyProbsOffset; + public uint SetVp9BackwardUpdatesOffset; + public uint SetVp9LastFrameSegMapOffset; + public uint SetVp9CurrFrameSegMapOffset; + public uint Unknown5D0; + public uint SetVp9LastFrameMvsOffset; + public uint SetVp9CurrFrameMvsOffset; + public uint Unknown5DC; + } +} diff --git a/Ryujinx.Graphics.Nvdec/ResourceManager.cs b/Ryujinx.Graphics.Nvdec/ResourceManager.cs new file mode 100644 index 00000000..6e0d9ab2 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/ResourceManager.cs @@ -0,0 +1,17 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; + +namespace Ryujinx.Graphics.Nvdec +{ + struct ResourceManager + { + public MemoryManager Gmm { get; } + public SurfaceCache Cache { get; } + + public ResourceManager(MemoryManager gmm, SurfaceCache cache) + { + Gmm = gmm; + Cache = cache; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj index ddc3a8af..3561cf80 100644 --- a/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj +++ b/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj @@ -1,4 +1,4 @@ - + netcoreapp3.1 @@ -15,11 +15,13 @@ - - - - + + + + + + diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs new file mode 100644 index 00000000..92767e35 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs @@ -0,0 +1,120 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.H264 +{ + struct PictureInfo + { + Array18 Unknown0; + public uint BitstreamSize; + public uint NumSlices; + public uint Unknown50; + public uint Unknown54; + public uint Log2MaxPicOrderCntLsbMinus4; + public uint DeltaPicOrderAlwaysZeroFlag; + public uint FrameMbsOnlyFlag; + public uint PicWidthInMbs; + public uint PicHeightInMbs; + public uint BlockLayout; // Not supported on T210 + public uint EntropyCodingModeFlag; + public uint PicOrderPresentFlag; + public uint NumRefIdxL0ActiveMinus1; + public uint NumRefIdxL1ActiveMinus1; + public uint DeblockingFilterControlPresentFlag; + public uint RedundantPicCntPresentFlag; + public uint Transform8x8ModeFlag; + public uint LumaPitch; + public uint ChromaPitch; + public uint Unknown94; + public uint LumaSecondFieldOffset; + public uint Unknown9C; + public uint UnknownA0; + public uint ChromaSecondFieldOffset; + public uint UnknownA8; + public uint UnknownAC; + public ulong Flags; + public Array2 FieldOrderCnt; + public Array16 RefFrames; + public Array6> ScalingLists4x4; + public Array2> ScalingLists8x8; + public byte MvcextNumInterViewRefsL0; + public byte MvcextNumInterViewRefsL1; + public ushort Padding2A2; + public uint Unknown2A4; + public uint Unknown2A8; + public uint Unknown2AC; + public Array16 MvcextViewRefMasksL0; + public Array16 MvcextViewRefMasksL1; + public uint Flags2; + public Array10 Unknown2D4; + + public bool MbAdaptiveFrameFieldFlag => (Flags & (1 << 0)) != 0; + public bool Direct8x8InferenceFlag => (Flags & (1 << 1)) != 0; + public bool WeightedPredFlag => (Flags & (1 << 2)) != 0; + public bool ConstrainedIntraPredFlag => (Flags & (1 << 3)) != 0; + public bool IsReference => (Flags & (1 << 4)) != 0; + public bool FieldPicFlag => (Flags & (1 << 5)) != 0; + public bool BottomFieldFlag => (Flags & (1 << 6)) != 0; + public uint Log2MaxFrameNumMinus4 => (uint)(Flags >> 8) & 0xf; + public ushort ChromaFormatIdc => (ushort)((Flags >> 12) & 3); + public uint PicOrderCntType => (uint)(Flags >> 14) & 3; + public int PicInitQpMinus26 => ExtractSx(Flags, 16, 6); + public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5); + public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5); + public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3; + public uint LumaOutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; + public uint ChromaOutputSurfaceIndex => (uint)(Flags >> 41) & 0x1f; + public ushort FrameNum => (ushort)(Flags >> 46); + public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0; + + private static int ExtractSx(ulong packed, int lsb, int length) + { + return (int)((long)packed << (64 - (lsb + length)) >> (64 - length)); + } + + public H264PictureInfo Convert() + { + return new H264PictureInfo() + { + FieldOrderCnt = FieldOrderCnt, + IsReference = IsReference, + ChromaFormatIdc = ChromaFormatIdc, + FrameNum = FrameNum, + FieldPicFlag = FieldPicFlag, + BottomFieldFlag = BottomFieldFlag, + NumRefFrames = 0, + MbAdaptiveFrameFieldFlag = MbAdaptiveFrameFieldFlag, + ConstrainedIntraPredFlag = ConstrainedIntraPredFlag, + WeightedPredFlag = WeightedPredFlag, + WeightedBipredIdc = WeightedBipredIdc, + FrameMbsOnlyFlag = FrameMbsOnlyFlag != 0, + Transform8x8ModeFlag = Transform8x8ModeFlag != 0, + ChromaQpIndexOffset = ChromaQpIndexOffset, + SecondChromaQpIndexOffset = SecondChromaQpIndexOffset, + PicInitQpMinus26 = PicInitQpMinus26, + NumRefIdxL0ActiveMinus1 = NumRefIdxL0ActiveMinus1, + NumRefIdxL1ActiveMinus1 = NumRefIdxL1ActiveMinus1, + Log2MaxFrameNumMinus4 = Log2MaxFrameNumMinus4, + PicOrderCntType = PicOrderCntType, + Log2MaxPicOrderCntLsbMinus4 = Log2MaxPicOrderCntLsbMinus4, + DeltaPicOrderAlwaysZeroFlag = DeltaPicOrderAlwaysZeroFlag != 0, + Direct8x8InferenceFlag = Direct8x8InferenceFlag, + EntropyCodingModeFlag = EntropyCodingModeFlag != 0, + PicOrderPresentFlag = PicOrderPresentFlag != 0, + DeblockingFilterControlPresentFlag = DeblockingFilterControlPresentFlag != 0, + RedundantPicCntPresentFlag = RedundantPicCntPresentFlag != 0, + NumSliceGroupsMinus1 = 0, + SliceGroupMapType = 0, + SliceGroupChangeRateMinus1 = 0, + FmoAsoEnable = false, + ScalingMatrixPresent = true, + ScalingLists4x4 = ScalingLists4x4, + ScalingLists8x8 = ScalingLists8x8, + FrameType = 0, + PicWidthInMbsMinus1 = PicWidthInMbs - 1, + PicHeightInMapUnitsMinus1 = (PicHeightInMbs >> (FrameMbsOnlyFlag != 0 ? 0 : 1)) - 1, + QpprimeYZeroTransformBypassFlag = QpprimeYZeroTransformBypassFlag + }; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs new file mode 100644 index 00000000..5db311ae --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Types.H264 +{ + struct ReferenceFrame + { + public uint Unknown0; + public uint Unknown4; + public uint Unknown8; + public uint UnknownC; + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs new file mode 100644 index 00000000..661e6cdd --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs @@ -0,0 +1,72 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct BackwardUpdates + { + public Array7>> InterModeCounts; + public Array4> YModeCounts; + public Array10> UvModeCounts; + public Array16> PartitionCounts; + public Array4> SwitchableInterpsCount; + public Array4> IntraInterCount; + public Array5> CompInterCount; + public Array5>> SingleRefCount; + public Array5> CompRefCount; + public Array2> Tx32x32; + public Array2> Tx16x16; + public Array2> Tx8x8; + public Array3> MbSkipCount; + public Array4 Joints; + public Array2> Sign; + public Array2> Classes; + public Array2> Class0; + public Array2>> Bits; + public Array2>> Class0Fp; + public Array2> Fp; + public Array2> Class0Hp; + public Array2> Hp; + public Array4>>>>> CoefCounts; + public Array4>>>> EobCounts; + + public BackwardUpdates(ref Vp9BackwardUpdates counts) + { + InterModeCounts = new Array7>>(); + + for (int i = 0; i < 7; i++) + { + InterModeCounts[i][0][0] = counts.InterMode[i][2]; + InterModeCounts[i][0][1] = counts.InterMode[i][0] + counts.InterMode[i][1] + counts.InterMode[i][3]; + InterModeCounts[i][1][0] = counts.InterMode[i][0]; + InterModeCounts[i][1][1] = counts.InterMode[i][1] + counts.InterMode[i][3]; + InterModeCounts[i][2][0] = counts.InterMode[i][1]; + InterModeCounts[i][2][1] = counts.InterMode[i][3]; + } + + YModeCounts = counts.YMode; + UvModeCounts = counts.UvMode; + PartitionCounts = counts.Partition; + SwitchableInterpsCount = counts.SwitchableInterp; + IntraInterCount = counts.IntraInter; + CompInterCount = counts.CompInter; + SingleRefCount = counts.SingleRef; + CompRefCount = counts.CompRef; + Tx32x32 = counts.Tx32x32; + Tx16x16 = counts.Tx16x16; + Tx8x8 = counts.Tx8x8; + MbSkipCount = counts.Skip; + Joints = counts.Joints; + Sign = counts.Sign; + Classes = counts.Classes; + Class0 = counts.Class0; + Bits = counts.Bits; + Class0Fp = counts.Class0Fp; + Fp = counts.Fp; + Class0Hp = counts.Class0Hp; + Hp = counts.Hp; + CoefCounts = counts.Coef; + EobCounts = counts.EobBranch; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs new file mode 100644 index 00000000..bc848454 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs @@ -0,0 +1,139 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct EntropyProbs + { + public Array10>> KfYModeProbE0ToE7; + public Array10> KfYModeProbE8; + public Array3 Padding384; + public Array7 SegTreeProbs; + public Array3 SegPredProbs; + public Array15 Padding391; + public Array10> KfUvModeProbE0ToE7; + public Array10 KfUvModeProbE8; + public Array6 Padding3FA; + public Array7> InterModeProb; + public Array4 IntraInterProb; + public Array10> UvModeProbE0ToE7; + public Array2> Tx8x8Prob; + public Array2> Tx16x16Prob; + public Array2> Tx32x32Prob; + public Array4 YModeProbE8; + public Array4> YModeProbE0ToE7; + public Array16> KfPartitionProb; + public Array16> PartitionProb; + public Array10 UvModeProbE8; + public Array4> SwitchableInterpProb; + public Array5 CompInterProb; + public Array4 SkipProbs; + public Array3 Joints; + public Array2 Sign; + public Array2> Class0; + public Array2> Fp; + public Array2 Class0Hp; + public Array2 Hp; + public Array2> Classes; + public Array2>> Class0Fp; + public Array2> Bits; + public Array5> SingleRefProb; + public Array5 CompRefProb; + public Array17 Padding58F; + public Array4>>>>> CoefProbs; + + public void Convert(ref Vp9EntropyProbs fc) + { + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 10; j++) + { + for (int k = 0; k < 9; k++) + { + fc.KfYModeProb[i][j][k] = k < 8 ? KfYModeProbE0ToE7[i][j][k] : KfYModeProbE8[i][j]; + } + } + } + + fc.SegTreeProb = SegTreeProbs; + fc.SegPredProb = SegPredProbs; + + for (int i = 0; i < 7; i++) + { + for (int j = 0; j < 3; j++) + { + fc.InterModeProb[i][j] = InterModeProb[i][j]; + } + } + + fc.IntraInterProb = IntraInterProb; + + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 9; j++) + { + fc.KfUvModeProb[i][j] = j < 8 ? KfUvModeProbE0ToE7[i][j] : KfUvModeProbE8[i]; + fc.UvModeProb[i][j] = j < 8 ? UvModeProbE0ToE7[i][j] : UvModeProbE8[i]; + } + } + + fc.Tx8x8Prob = Tx8x8Prob; + fc.Tx16x16Prob = Tx16x16Prob; + fc.Tx32x32Prob = Tx32x32Prob; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 9; j++) + { + fc.YModeProb[i][j] = j < 8 ? YModeProbE0ToE7[i][j] : YModeProbE8[i]; + } + } + + for (int i = 0; i < 16; i++) + { + for (int j = 0; j < 3; j++) + { + fc.KfPartitionProb[i][j] = KfPartitionProb[i][j]; + fc.PartitionProb[i][j] = PartitionProb[i][j]; + } + } + + fc.SwitchableInterpProb = SwitchableInterpProb; + fc.CompInterProb = CompInterProb; + fc.SkipProb[0] = SkipProbs[0]; + fc.SkipProb[1] = SkipProbs[1]; + fc.SkipProb[2] = SkipProbs[2]; + fc.Joints = Joints; + fc.Sign = Sign; + fc.Class0 = Class0; + fc.Fp = Fp; + fc.Class0Hp = Class0Hp; + fc.Hp = Hp; + fc.Classes = Classes; + fc.Class0Fp = Class0Fp; + fc.Bits = Bits; + fc.SingleRefProb = SingleRefProb; + fc.CompRefProb = CompRefProb; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 2; j++) + { + for (int k = 0; k < 2; k++) + { + for (int l = 0; l < 6; l++) + { + for (int m = 0; m < 6; m++) + { + for (int n = 0; n < 3; n++) + { + fc.CoefProbs[i][j][k][l][m][n] = CoefProbs[i][j][k][l][m][n]; + } + } + } + } + } + } + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs new file mode 100644 index 00000000..88f1ac20 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + enum FrameFlags : uint + { + IsKeyFrame = 1 << 0, + LastFrameIsKeyFrame = 1 << 1, + FrameSizeChanged = 1 << 2, + ErrorResilientMode = 1 << 3, + LastShowFrame = 1 << 4, + IntraOnly = 1 << 5 + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs new file mode 100644 index 00000000..70988b48 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct FrameSize + { + public ushort Width; + public ushort Height; + public ushort LumaPitch; + public ushort ChromaPitch; + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs new file mode 100644 index 00000000..3a3d4762 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct FrameStats + { + public uint Unknown0; + public uint Unknown4; + public uint Pass2CycleCount; + public uint ErrorStatus; + public uint FrameStatusIntraCnt; + public uint FrameStatusInterCnt; + public uint FrameStatusSkipCtuCount; + public uint FrameStatusFwdMvxCnt; + public uint FrameStatusFwdMvyCnt; + public uint FrameStatusBwdMvxCnt; + public uint FrameStatusBwdMvyCnt; + public uint ErrorCtbPos; + public uint ErrorSlicePos; + public uint Unknown34; + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs new file mode 100644 index 00000000..d8d5ab20 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs @@ -0,0 +1,11 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct LoopFilter + { + public byte ModeRefDeltaEnabled; + public Array4 RefDeltas; + public Array2 ModeDeltas; + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs new file mode 100644 index 00000000..f1f9e2f1 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs @@ -0,0 +1,85 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct PictureInfo + { + public Array12 Unknown0; + public uint BitstreamSize; + public uint IsEncrypted; + public uint Unknown38; + public uint Reserved3C; + public uint BlockLayout; // Not supported on T210 + public uint WorkBufferSizeShr8; + public FrameSize LastFrameSize; + public FrameSize GoldenFrameSize; + public FrameSize AltFrameSize; + public FrameSize CurrentFrameSize; + public FrameFlags Flags; + public Array4 RefFrameSignBias; + public byte FirstLevel; + public byte SharpnessLevel; + public byte BaseQIndex; + public byte YDcDeltaQ; + public byte UvAcDeltaQ; + public byte UvDcDeltaQ; + public byte Lossless; + public byte TxMode; + public byte AllowHighPrecisionMv; + public byte InterpFilter; + public byte ReferenceMode; + public sbyte CompFixedRef; + public Array2 CompVarRef; + public byte Log2TileCols; + public byte Log2TileRows; + public Segmentation Seg; + public LoopFilter Lf; + public byte PaddingEB; + public uint WorkBufferSizeShr8New; // Not supported on T210 + public uint SurfaceParams; // Not supported on T210 + public uint UnknownF4; + public uint UnknownF8; + public uint UnknownFC; + + public uint BitDepth => (SurfaceParams >> 1) & 0xf; + + public Vp9PictureInfo Convert() + { + return new Vp9PictureInfo() + { + IsKeyFrame = Flags.HasFlag(FrameFlags.IsKeyFrame), + IntraOnly = Flags.HasFlag(FrameFlags.IntraOnly), + UsePrevInFindMvRefs = + !Flags.HasFlag(FrameFlags.ErrorResilientMode) && + !Flags.HasFlag(FrameFlags.FrameSizeChanged) && + !Flags.HasFlag(FrameFlags.IntraOnly) && + Flags.HasFlag(FrameFlags.LastShowFrame) && + !Flags.HasFlag(FrameFlags.LastFrameIsKeyFrame), + RefFrameSignBias = RefFrameSignBias, + BaseQIndex = BaseQIndex, + YDcDeltaQ = YDcDeltaQ, + UvDcDeltaQ = UvDcDeltaQ, + UvAcDeltaQ = UvAcDeltaQ, + Lossless = Lossless != 0, + TransformMode = TxMode, + AllowHighPrecisionMv = AllowHighPrecisionMv != 0, + InterpFilter = InterpFilter, + ReferenceMode = ReferenceMode, + CompFixedRef = CompFixedRef, + CompVarRef = CompVarRef, + Log2TileCols = Log2TileCols, + Log2TileRows = Log2TileRows, + SegmentEnabled = Seg.Enabled != 0, + SegmentMapUpdate = Seg.UpdateMap != 0, + SegmentMapTemporalUpdate = Seg.TemporalUpdate != 0, + SegmentAbsDelta = Seg.AbsDelta, + SegmentFeatureEnable = Seg.FeatureMask, + SegmentFeatureData = Seg.FeatureData, + ModeRefDeltaEnabled = Lf.ModeRefDeltaEnabled != 0, + RefDeltas = Lf.RefDeltas, + ModeDeltas = Lf.ModeDeltas + }; + } + } +} diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs new file mode 100644 index 00000000..ed62293d --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs @@ -0,0 +1,14 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct Segmentation + { + public byte Enabled; + public byte UpdateMap; + public byte TemporalUpdate; + public byte AbsDelta; + public Array8 FeatureMask; + public Array8> FeatureData; + } +} diff --git a/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs deleted file mode 100644 index db2d39e5..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/BitStreamWriter.cs +++ /dev/null @@ -1,75 +0,0 @@ -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class BitStreamWriter - { - private const int BufferSize = 8; - - private Stream _baseStream; - - private int _buffer; - private int _bufferPos; - - public BitStreamWriter(Stream baseStream) - { - _baseStream = baseStream; - } - - public void WriteBit(bool value) - { - WriteBits(value ? 1 : 0, 1); - } - - public void WriteBits(int value, int valueSize) - { - int valuePos = 0; - - int remaining = valueSize; - - while (remaining > 0) - { - int copySize = remaining; - - int free = GetFreeBufferBits(); - - if (copySize > free) - { - copySize = free; - } - - int mask = (1 << copySize) - 1; - - int srcShift = (valueSize - valuePos) - copySize; - int dstShift = (BufferSize - _bufferPos) - copySize; - - _buffer |= ((value >> srcShift) & mask) << dstShift; - - valuePos += copySize; - _bufferPos += copySize; - remaining -= copySize; - } - } - - private int GetFreeBufferBits() - { - if (_bufferPos == BufferSize) - { - Flush(); - } - - return BufferSize - _bufferPos; - } - - public void Flush() - { - if (_bufferPos != 0) - { - _baseStream.WriteByte((byte)_buffer); - - _buffer = 0; - _bufferPos = 0; - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs b/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs deleted file mode 100644 index 4f17d8d1..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/DecoderHelper.cs +++ /dev/null @@ -1,17 +0,0 @@ -using System; - -namespace Ryujinx.Graphics.VDec -{ - static class DecoderHelper - { - public static byte[] Combine(byte[] arr0, byte[] arr1) - { - byte[] output = new byte[arr0.Length + arr1.Length]; - - Buffer.BlockCopy(arr0, 0, output, 0, arr0.Length); - Buffer.BlockCopy(arr1, 0, output, arr0.Length, arr1.Length); - - return output; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs deleted file mode 100644 index ccd01f0d..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/FFmpeg.cs +++ /dev/null @@ -1,168 +0,0 @@ -using FFmpeg.AutoGen; -using System; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.VDec -{ - static unsafe class FFmpegWrapper - { - private static AVCodec* _codec; - private static AVCodecContext* _context; - private static AVFrame* _frame; - private static SwsContext* _scalerCtx; - - private static int _scalerWidth; - private static int _scalerHeight; - - public static bool IsInitialized { get; private set; } - - public static void H264Initialize() - { - EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_H264); - } - - public static void Vp9Initialize() - { - EnsureCodecInitialized(AVCodecID.AV_CODEC_ID_VP9); - } - - private static void EnsureCodecInitialized(AVCodecID codecId) - { - if (IsInitialized) - { - Uninitialize(); - } - - _codec = ffmpeg.avcodec_find_decoder(codecId); - _context = ffmpeg.avcodec_alloc_context3(_codec); - _frame = ffmpeg.av_frame_alloc(); - - ffmpeg.avcodec_open2(_context, _codec, null); - - IsInitialized = true; - } - - public static int DecodeFrame(byte[] data) - { - if (!IsInitialized) - { - throw new InvalidOperationException("Tried to use uninitialized codec!"); - } - - AVPacket packet; - - ffmpeg.av_init_packet(&packet); - - fixed (byte* ptr = data) - { - packet.data = ptr; - packet.size = data.Length; - - ffmpeg.avcodec_send_packet(_context, &packet); - } - - return ffmpeg.avcodec_receive_frame(_context, _frame); - } - - public static FFmpegFrame GetFrame() - { - if (!IsInitialized) - { - throw new InvalidOperationException("Tried to use uninitialized codec!"); - } - - AVFrame managedFrame = Marshal.PtrToStructure((IntPtr)_frame); - - byte*[] data = managedFrame.data.ToArray(); - - return new FFmpegFrame() - { - Width = managedFrame.width, - Height = managedFrame.height, - - LumaPtr = data[0], - ChromaBPtr = data[1], - ChromaRPtr = data[2] - }; - } - - public static FFmpegFrame GetFrameRgba() - { - if (!IsInitialized) - { - throw new InvalidOperationException("Tried to use uninitialized codec!"); - } - - AVFrame managedFrame = Marshal.PtrToStructure((IntPtr)_frame); - - EnsureScalerSetup(managedFrame.width, managedFrame.height); - - byte*[] data = managedFrame.data.ToArray(); - - int[] lineSizes = managedFrame.linesize.ToArray(); - - byte[] dst = new byte[managedFrame.width * managedFrame.height * 4]; - - fixed (byte* ptr = dst) - { - byte*[] dstData = new byte*[] { ptr }; - - int[] dstLineSizes = new int[] { managedFrame.width * 4 }; - - ffmpeg.sws_scale(_scalerCtx, data, lineSizes, 0, managedFrame.height, dstData, dstLineSizes); - } - - return new FFmpegFrame() - { - Width = managedFrame.width, - Height = managedFrame.height, - - Data = dst - }; - } - - private static void EnsureScalerSetup(int width, int height) - { - if (width == 0 || height == 0) - { - return; - } - - if (_scalerCtx == null || _scalerWidth != width || _scalerHeight != height) - { - FreeScaler(); - - _scalerCtx = ffmpeg.sws_getContext( - width, height, AVPixelFormat.AV_PIX_FMT_YUV420P, - width, height, AVPixelFormat.AV_PIX_FMT_RGBA, 0, null, null, null); - - _scalerWidth = width; - _scalerHeight = height; - } - } - - public static void Uninitialize() - { - if (IsInitialized) - { - ffmpeg.av_frame_unref(_frame); - ffmpeg.av_free(_frame); - ffmpeg.avcodec_close(_context); - - FreeScaler(); - - IsInitialized = false; - } - } - - private static void FreeScaler() - { - if (_scalerCtx != null) - { - ffmpeg.sws_freeContext(_scalerCtx); - - _scalerCtx = null; - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs b/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs deleted file mode 100644 index 535a70c9..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/FFmpegFrame.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - unsafe struct FFmpegFrame - { - public int Width; - public int Height; - - public byte* LumaPtr; - public byte* ChromaBPtr; - public byte* ChromaRPtr; - - public byte[] Data; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs deleted file mode 100644 index b4fad59b..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/H264BitStreamWriter.cs +++ /dev/null @@ -1,79 +0,0 @@ -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class H264BitStreamWriter : BitStreamWriter - { - public H264BitStreamWriter(Stream baseStream) : base(baseStream) { } - - public void WriteU(int value, int valueSize) - { - WriteBits(value, valueSize); - } - - public void WriteSe(int value) - { - WriteExpGolombCodedInt(value); - } - - public void WriteUe(int value) - { - WriteExpGolombCodedUInt((uint)value); - } - - public void End() - { - WriteBit(true); - - Flush(); - } - - private void WriteExpGolombCodedInt(int value) - { - int sign = value <= 0 ? 0 : 1; - - if (value < 0) - { - value = -value; - } - - value = (value << 1) - sign; - - WriteExpGolombCodedUInt((uint)value); - } - - private void WriteExpGolombCodedUInt(uint value) - { - int size = 32 - CountLeadingZeros((int)value + 1); - - WriteBits(1, size); - - value -= (1u << (size - 1)) - 1; - - WriteBits((int)value, size - 1); - } - - private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; - - private static int CountLeadingZeros(int value) - { - if (value == 0) - { - return 32; - } - - int nibbleIdx = 32; - int preCount, count = 0; - - do - { - nibbleIdx -= 4; - preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111]; - count += preCount; - } - while (preCount == 4); - - return count; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs deleted file mode 100644 index 24c7e0b9..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/H264Decoder.cs +++ /dev/null @@ -1,238 +0,0 @@ -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class H264Decoder - { - private int _log2MaxPicOrderCntLsbMinus4; - private bool _deltaPicOrderAlwaysZeroFlag; - private bool _frameMbsOnlyFlag; - private int _picWidthInMbs; - private int _picHeightInMapUnits; - private bool _entropyCodingModeFlag; - private bool _bottomFieldPicOrderInFramePresentFlag; - private int _numRefIdxL0DefaultActiveMinus1; - private int _numRefIdxL1DefaultActiveMinus1; - private bool _deblockingFilterControlPresentFlag; - private bool _redundantPicCntPresentFlag; - private bool _transform8x8ModeFlag; - private bool _mbAdaptiveFrameFieldFlag; - private bool _direct8x8InferenceFlag; - private bool _weightedPredFlag; - private bool _constrainedIntraPredFlag; - private bool _fieldPicFlag; - private bool _bottomFieldFlag; - private int _log2MaxFrameNumMinus4; - private int _chromaFormatIdc; - private int _picOrderCntType; - private int _picInitQpMinus26; - private int _chromaQpIndexOffset; - private int _chromaQpIndexOffset2; - private int _weightedBipredIdc; - private int _frameNumber; - private byte[] _scalingMatrix4; - private byte[] _scalingMatrix8; - - public void Decode(H264ParameterSets Params, H264Matrices matrices, byte[] frameData) - { - _log2MaxPicOrderCntLsbMinus4 = Params.Log2MaxPicOrderCntLsbMinus4; - _deltaPicOrderAlwaysZeroFlag = Params.DeltaPicOrderAlwaysZeroFlag; - _frameMbsOnlyFlag = Params.FrameMbsOnlyFlag; - _picWidthInMbs = Params.PicWidthInMbs; - _picHeightInMapUnits = Params.PicHeightInMapUnits; - _entropyCodingModeFlag = Params.EntropyCodingModeFlag; - _bottomFieldPicOrderInFramePresentFlag = Params.BottomFieldPicOrderInFramePresentFlag; - _numRefIdxL0DefaultActiveMinus1 = Params.NumRefIdxL0DefaultActiveMinus1; - _numRefIdxL1DefaultActiveMinus1 = Params.NumRefIdxL1DefaultActiveMinus1; - _deblockingFilterControlPresentFlag = Params.DeblockingFilterControlPresentFlag; - _redundantPicCntPresentFlag = Params.RedundantPicCntPresentFlag; - _transform8x8ModeFlag = Params.Transform8x8ModeFlag; - - _mbAdaptiveFrameFieldFlag = ((Params.Flags >> 0) & 1) != 0; - _direct8x8InferenceFlag = ((Params.Flags >> 1) & 1) != 0; - _weightedPredFlag = ((Params.Flags >> 2) & 1) != 0; - _constrainedIntraPredFlag = ((Params.Flags >> 3) & 1) != 0; - _fieldPicFlag = ((Params.Flags >> 5) & 1) != 0; - _bottomFieldFlag = ((Params.Flags >> 6) & 1) != 0; - - _log2MaxFrameNumMinus4 = (int)(Params.Flags >> 8) & 0xf; - _chromaFormatIdc = (int)(Params.Flags >> 12) & 0x3; - _picOrderCntType = (int)(Params.Flags >> 14) & 0x3; - _picInitQpMinus26 = (int)(Params.Flags >> 16) & 0x3f; - _chromaQpIndexOffset = (int)(Params.Flags >> 22) & 0x1f; - _chromaQpIndexOffset2 = (int)(Params.Flags >> 27) & 0x1f; - _weightedBipredIdc = (int)(Params.Flags >> 32) & 0x3; - _frameNumber = (int)(Params.Flags >> 46) & 0x1ffff; - - _picInitQpMinus26 = (_picInitQpMinus26 << 26) >> 26; - _chromaQpIndexOffset = (_chromaQpIndexOffset << 27) >> 27; - _chromaQpIndexOffset2 = (_chromaQpIndexOffset2 << 27) >> 27; - - _scalingMatrix4 = matrices.ScalingMatrix4; - _scalingMatrix8 = matrices.ScalingMatrix8; - - if (FFmpegWrapper.IsInitialized) - { - FFmpegWrapper.DecodeFrame(frameData); - } - else - { - FFmpegWrapper.H264Initialize(); - - FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(EncodeHeader(), frameData)); - } - } - - private byte[] EncodeHeader() - { - using (MemoryStream data = new MemoryStream()) - { - H264BitStreamWriter writer = new H264BitStreamWriter(data); - - // Sequence Parameter Set. - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - writer.WriteUe(_chromaFormatIdc); - - if (_chromaFormatIdc == 3) - { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); - writer.WriteBit(false); //Scaling matrix present flag - - writer.WriteUe(_log2MaxFrameNumMinus4); - writer.WriteUe(_picOrderCntType); - - if (_picOrderCntType == 0) - { - writer.WriteUe(_log2MaxPicOrderCntLsbMinus4); - } - else if (_picOrderCntType == 1) - { - writer.WriteBit(_deltaPicOrderAlwaysZeroFlag); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - int picHeightInMbs = _picHeightInMapUnits / (_frameMbsOnlyFlag ? 1 : 2); - - writer.WriteUe(16); - writer.WriteBit(false); - writer.WriteUe(_picWidthInMbs - 1); - writer.WriteUe(picHeightInMbs - 1); - writer.WriteBit(_frameMbsOnlyFlag); - - if (!_frameMbsOnlyFlag) - { - writer.WriteBit(_mbAdaptiveFrameFieldFlag); - } - - writer.WriteBit(_direct8x8InferenceFlag); - writer.WriteBit(false); //Frame cropping flag - writer.WriteBit(false); //VUI parameter present flag - - writer.End(); - - // Picture Parameter Set. - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); - - writer.WriteUe(0); - writer.WriteUe(0); - - writer.WriteBit(_entropyCodingModeFlag); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(_numRefIdxL0DefaultActiveMinus1); - writer.WriteUe(_numRefIdxL1DefaultActiveMinus1); - writer.WriteBit(_weightedPredFlag); - writer.WriteU(_weightedBipredIdc, 2); - writer.WriteSe(_picInitQpMinus26); - writer.WriteSe(0); - writer.WriteSe(_chromaQpIndexOffset); - writer.WriteBit(_deblockingFilterControlPresentFlag); - writer.WriteBit(_constrainedIntraPredFlag); - writer.WriteBit(_redundantPicCntPresentFlag); - writer.WriteBit(_transform8x8ModeFlag); - - writer.WriteBit(true); - - for (int index = 0; index < 6; index++) - { - writer.WriteBit(true); - - WriteScalingList(writer, _scalingMatrix4, index * 16, 16); - } - - if (_transform8x8ModeFlag) - { - for (int index = 0; index < 2; index++) - { - writer.WriteBit(true); - - WriteScalingList(writer, _scalingMatrix8, index * 64, 64); - } - } - - writer.WriteSe(_chromaQpIndexOffset2); - - writer.End(); - - return data.ToArray(); - } - } - - // ZigZag LUTs from libavcodec. - private static readonly byte[] ZigZagDirect = new byte[] - { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, - 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, - 53, 60, 61, 54, 47, 55, 62, 63 - }; - - private static readonly byte[] ZigZagScan = new byte[] - { - 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, - 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, - 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, - 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4 - }; - - private static void WriteScalingList(H264BitStreamWriter writer, byte[] list, int start, int count) - { - byte[] scan = count == 16 ? ZigZagScan : ZigZagDirect; - - int lastScale = 8; - - for (int index = 0; index < count; index++) - { - byte value = list[start + scan[index]]; - - int deltaScale = value - lastScale; - - writer.WriteSe(deltaScale); - - lastScale = value; - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs b/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs deleted file mode 100644 index a1524214..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/H264Matrices.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - struct H264Matrices - { - public byte[] ScalingMatrix4; - public byte[] ScalingMatrix8; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs b/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs deleted file mode 100644 index f242f0f2..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/H264ParameterSets.cs +++ /dev/null @@ -1,34 +0,0 @@ -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.VDec -{ - [StructLayout(LayoutKind.Sequential, Pack = 4)] - struct H264ParameterSets - { - public int Log2MaxPicOrderCntLsbMinus4; - public bool DeltaPicOrderAlwaysZeroFlag; - public bool FrameMbsOnlyFlag; - public int PicWidthInMbs; - public int PicHeightInMapUnits; - public int Reserved6C; - public bool EntropyCodingModeFlag; - public bool BottomFieldPicOrderInFramePresentFlag; - public int NumRefIdxL0DefaultActiveMinus1; - public int NumRefIdxL1DefaultActiveMinus1; - public bool DeblockingFilterControlPresentFlag; - public bool RedundantPicCntPresentFlag; - public bool Transform8x8ModeFlag; - public int Unknown8C; - public int Unknown90; - public int Reserved94; - public int Unknown98; - public int Reserved9C; - public int ReservedA0; - public int UnknownA4; - public int ReservedA8; - public int UnknownAC; - public long Flags; - public int FrameNumber; - public int FrameNumber2; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs deleted file mode 100644 index f031919d..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/VideoCodec.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - enum VideoCodec - { - H264 = 3, - Vp8 = 5, - H265 = 7, - Vp9 = 9 - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs deleted file mode 100644 index 9afc9485..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoder.cs +++ /dev/null @@ -1,281 +0,0 @@ -using Ryujinx.Graphics.Gpu; -using Ryujinx.Graphics.Gpu.Memory; -using Ryujinx.Graphics.Vic; -using System; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.VDec -{ - unsafe class VideoDecoder - { - private H264Decoder _h264Decoder; - private Vp9Decoder _vp9Decoder; - - private VideoCodec _currentVideoCodec; - - private ulong _decoderContextAddress; - private ulong _frameDataAddress; - private ulong _vpxCurrLumaAddress; - private ulong _vpxRef0LumaAddress; - private ulong _vpxRef1LumaAddress; - private ulong _vpxRef2LumaAddress; - private ulong _vpxCurrChromaAddress; - private ulong _vpxRef0ChromaAddress; - private ulong _vpxRef1ChromaAddress; - private ulong _vpxRef2ChromaAddress; - private ulong _vpxProbTablesAddress; - - public VideoDecoder() - { - _h264Decoder = new H264Decoder(); - _vp9Decoder = new Vp9Decoder(); - } - - public void Process(GpuContext gpu, int methodOffset, int[] arguments) - { - VideoDecoderMeth method = (VideoDecoderMeth)methodOffset; - - switch (method) - { - case VideoDecoderMeth.SetVideoCodec: SetVideoCodec(arguments); break; - case VideoDecoderMeth.Execute: Execute(gpu); break; - case VideoDecoderMeth.SetDecoderCtxAddr: SetDecoderCtxAddr(arguments); break; - case VideoDecoderMeth.SetFrameDataAddr: SetFrameDataAddr(arguments); break; - case VideoDecoderMeth.SetVpxCurrLumaAddr: SetVpxCurrLumaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef0LumaAddr: SetVpxRef0LumaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef1LumaAddr: SetVpxRef1LumaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef2LumaAddr: SetVpxRef2LumaAddr(arguments); break; - case VideoDecoderMeth.SetVpxCurrChromaAddr: SetVpxCurrChromaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef0ChromaAddr: SetVpxRef0ChromaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef1ChromaAddr: SetVpxRef1ChromaAddr(arguments); break; - case VideoDecoderMeth.SetVpxRef2ChromaAddr: SetVpxRef2ChromaAddr(arguments); break; - case VideoDecoderMeth.SetVpxProbTablesAddr: SetVpxProbTablesAddr(arguments); break; - } - } - - private void SetVideoCodec(int[] arguments) - { - _currentVideoCodec = (VideoCodec)arguments[0]; - } - - private void Execute(GpuContext gpu) - { - if (_currentVideoCodec == VideoCodec.H264) - { - int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x48); - - H264ParameterSets Params = gpu.MemoryAccessor.Read(_decoderContextAddress + 0x58); - - H264Matrices matrices = new H264Matrices() - { - ScalingMatrix4 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x1c0, 6 * 16), - ScalingMatrix8 = gpu.MemoryAccessor.ReadBytes(_decoderContextAddress + 0x220, 2 * 64) - }; - - byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, frameDataSize); - - _h264Decoder.Decode(Params, matrices, frameData); - } - else if (_currentVideoCodec == VideoCodec.Vp9) - { - int frameDataSize = gpu.MemoryAccessor.ReadInt32(_decoderContextAddress + 0x30); - - Vp9FrameKeys keys = new Vp9FrameKeys() - { - CurrKey = (long)gpu.MemoryManager.Translate(_vpxCurrLumaAddress), - Ref0Key = (long)gpu.MemoryManager.Translate(_vpxRef0LumaAddress), - Ref1Key = (long)gpu.MemoryManager.Translate(_vpxRef1LumaAddress), - Ref2Key = (long)gpu.MemoryManager.Translate(_vpxRef2LumaAddress) - }; - - Vp9FrameHeader header = ReadStruct(gpu.MemoryAccessor, _decoderContextAddress + 0x48); - - Vp9ProbabilityTables probs = new Vp9ProbabilityTables() - { - SegmentationTreeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x387, 0x7), - SegmentationPredProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x38e, 0x3), - Tx8x8Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x470, 0x2), - Tx16x16Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x472, 0x4), - Tx32x32Probs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x476, 0x6), - CoefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x5a0, 0x900), - SkipProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x537, 0x3), - InterModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x400, 0x1c), - InterpFilterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x52a, 0x8), - IsInterProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x41c, 0x4), - CompModeProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x532, 0x5), - SingleRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x580, 0xa), - CompRefProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x58a, 0x5), - YModeProbs0 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x480, 0x20), - YModeProbs1 = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x47c, 0x4), - PartitionProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x4e0, 0x40), - MvJointProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53b, 0x3), - MvSignProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x53e, 0x3), - MvClassProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54c, 0x14), - MvClass0BitProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x540, 0x3), - MvBitsProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x56c, 0x14), - MvClass0FrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x560, 0xc), - MvFrProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x542, 0x6), - MvClass0HpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x548, 0x2), - MvHpProbs = gpu.MemoryAccessor.ReadBytes(_vpxProbTablesAddress + 0x54a, 0x2) - }; - - byte[] frameData = gpu.MemoryAccessor.ReadBytes(_frameDataAddress, frameDataSize); - - _vp9Decoder.Decode(keys, header, probs, frameData); - } - else - { - ThrowUnimplementedCodec(); - } - } - - private T ReadStruct(MemoryAccessor accessor, ulong address) where T : struct - { - byte[] data = accessor.ReadBytes(address, Marshal.SizeOf()); - - unsafe - { - fixed (byte* ptr = data) - { - return Marshal.PtrToStructure((IntPtr)ptr); - } - } - } - - private void SetDecoderCtxAddr(int[] arguments) - { - _decoderContextAddress = GetAddress(arguments); - } - - private void SetFrameDataAddr(int[] arguments) - { - _frameDataAddress = GetAddress(arguments); - } - - private void SetVpxCurrLumaAddr(int[] arguments) - { - _vpxCurrLumaAddress = GetAddress(arguments); - } - - private void SetVpxRef0LumaAddr(int[] arguments) - { - _vpxRef0LumaAddress = GetAddress(arguments); - } - - private void SetVpxRef1LumaAddr(int[] arguments) - { - _vpxRef1LumaAddress = GetAddress(arguments); - } - - private void SetVpxRef2LumaAddr(int[] arguments) - { - _vpxRef2LumaAddress = GetAddress(arguments); - } - - private void SetVpxCurrChromaAddr(int[] arguments) - { - _vpxCurrChromaAddress = GetAddress(arguments); - } - - private void SetVpxRef0ChromaAddr(int[] arguments) - { - _vpxRef0ChromaAddress = GetAddress(arguments); - } - - private void SetVpxRef1ChromaAddr(int[] arguments) - { - _vpxRef1ChromaAddress = GetAddress(arguments); - } - - private void SetVpxRef2ChromaAddr(int[] arguments) - { - _vpxRef2ChromaAddress = GetAddress(arguments); - } - - private void SetVpxProbTablesAddr(int[] arguments) - { - _vpxProbTablesAddress = GetAddress(arguments); - } - - private static ulong GetAddress(int[] arguments) - { - return (ulong)(uint)arguments[0] << 8; - } - - internal void CopyPlanes(GpuContext gpu, SurfaceOutputConfig outputConfig) - { - switch (outputConfig.PixelFormat) - { - case SurfacePixelFormat.Rgba8: CopyPlanesRgba8 (gpu, outputConfig); break; - case SurfacePixelFormat.Yuv420P: CopyPlanesYuv420P(gpu, outputConfig); break; - - default: ThrowUnimplementedPixelFormat(outputConfig.PixelFormat); break; - } - } - - private void CopyPlanesRgba8(GpuContext gpu, SurfaceOutputConfig outputConfig) - { - FFmpegFrame frame = FFmpegWrapper.GetFrameRgba(); - - if ((frame.Width | frame.Height) == 0) - { - return; - } - - throw new NotImplementedException(); - } - - private void CopyPlanesYuv420P(GpuContext gpu, SurfaceOutputConfig outputConfig) - { - FFmpegFrame frame = FFmpegWrapper.GetFrame(); - - if ((frame.Width | frame.Height) == 0) - { - return; - } - - int halfSrcWidth = frame.Width / 2; - - int halfWidth = frame.Width / 2; - int halfHeight = frame.Height / 2; - - int alignedWidth = (outputConfig.SurfaceWidth + 0xff) & ~0xff; - - for (int y = 0; y < frame.Height; y++) - { - int src = y * frame.Width; - int dst = y * alignedWidth; - - int size = frame.Width; - - for (int offset = 0; offset < size; offset++) - { - gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceLumaAddress + (ulong)dst + (ulong)offset, *(frame.LumaPtr + src + offset)); - } - } - - // Copy chroma data from both channels with interleaving. - for (int y = 0; y < halfHeight; y++) - { - int src = y * halfSrcWidth; - int dst = y * alignedWidth; - - for (int x = 0; x < halfWidth; x++) - { - gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 0, *(frame.ChromaBPtr + src + x)); - gpu.MemoryAccessor.WriteByte(outputConfig.SurfaceChromaUAddress + (ulong)dst + (ulong)x * 2 + 1, *(frame.ChromaRPtr + src + x)); - } - } - } - - private void ThrowUnimplementedCodec() - { - throw new NotImplementedException($"Codec \"{_currentVideoCodec}\" is not supported!"); - } - - private void ThrowUnimplementedPixelFormat(SurfacePixelFormat pixelFormat) - { - throw new NotImplementedException($"Pixel format \"{pixelFormat}\" is not supported!"); - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs b/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs deleted file mode 100644 index 12286386..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/VideoDecoderMeth.cs +++ /dev/null @@ -1,19 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - enum VideoDecoderMeth - { - SetVideoCodec = 0x80, - Execute = 0xc0, - SetDecoderCtxAddr = 0x101, - SetFrameDataAddr = 0x102, - SetVpxRef0LumaAddr = 0x10c, - SetVpxRef1LumaAddr = 0x10d, - SetVpxRef2LumaAddr = 0x10e, - SetVpxCurrLumaAddr = 0x10f, - SetVpxRef0ChromaAddr = 0x11d, - SetVpxRef1ChromaAddr = 0x11e, - SetVpxRef2ChromaAddr = 0x11f, - SetVpxCurrChromaAddr = 0x120, - SetVpxProbTablesAddr = 0x170 - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs deleted file mode 100644 index b20a40be..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/Vp9Decoder.cs +++ /dev/null @@ -1,879 +0,0 @@ -using System.Collections.Generic; -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class Vp9Decoder - { - private const int DiffUpdateProbability = 252; - - private const int FrameSyncCode = 0x498342; - - private static readonly int[] MapLut = new int[] - { - 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, - 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, - 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, - 138, 139, 140, 141, 142, 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, - 153, 154, 155, 156, 157, 11, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, - 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, - 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, - 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, - 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, - 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, - 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19 - }; - - private byte[] DefaultTx8x8Probs = new byte[] { 100, 66 }; - private byte[] DefaultTx16x16Probs = new byte[] { 20, 152, 15, 101 }; - private byte[] DefaultTx32x32Probs = new byte[] { 3, 136, 37, 5, 52, 13 }; - - private byte[] _defaultCoefProbs = new byte[] - { - 195, 29, 183, 0, 84, 49, 136, 0, 8, 42, 71, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 31, 107, 169, 0, 35, 99, 159, 0, - 17, 82, 140, 0, 8, 66, 114, 0, 2, 44, 76, 0, 1, 19, 32, 0, - 40, 132, 201, 0, 29, 114, 187, 0, 13, 91, 157, 0, 7, 75, 127, 0, - 3, 58, 95, 0, 1, 28, 47, 0, 69, 142, 221, 0, 42, 122, 201, 0, - 15, 91, 159, 0, 6, 67, 121, 0, 1, 42, 77, 0, 1, 17, 31, 0, - 102, 148, 228, 0, 67, 117, 204, 0, 17, 82, 154, 0, 6, 59, 114, 0, - 2, 39, 75, 0, 1, 15, 29, 0, 156, 57, 233, 0, 119, 57, 212, 0, - 58, 48, 163, 0, 29, 40, 124, 0, 12, 30, 81, 0, 3, 12, 31, 0, - 191, 107, 226, 0, 124, 117, 204, 0, 25, 99, 155, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 29, 148, 210, 0, 37, 126, 194, 0, - 8, 93, 157, 0, 2, 68, 118, 0, 1, 39, 69, 0, 1, 17, 33, 0, - 41, 151, 213, 0, 27, 123, 193, 0, 3, 82, 144, 0, 1, 58, 105, 0, - 1, 32, 60, 0, 1, 13, 26, 0, 59, 159, 220, 0, 23, 126, 198, 0, - 4, 88, 151, 0, 1, 66, 114, 0, 1, 38, 71, 0, 1, 18, 34, 0, - 114, 136, 232, 0, 51, 114, 207, 0, 11, 83, 155, 0, 3, 56, 105, 0, - 1, 33, 65, 0, 1, 17, 34, 0, 149, 65, 234, 0, 121, 57, 215, 0, - 61, 49, 166, 0, 28, 36, 114, 0, 12, 25, 76, 0, 3, 16, 42, 0, - 214, 49, 220, 0, 132, 63, 188, 0, 42, 65, 137, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 85, 137, 221, 0, 104, 131, 216, 0, - 49, 111, 192, 0, 21, 87, 155, 0, 2, 49, 87, 0, 1, 16, 28, 0, - 89, 163, 230, 0, 90, 137, 220, 0, 29, 100, 183, 0, 10, 70, 135, 0, - 2, 42, 81, 0, 1, 17, 33, 0, 108, 167, 237, 0, 55, 133, 222, 0, - 15, 97, 179, 0, 4, 72, 135, 0, 1, 45, 85, 0, 1, 19, 38, 0, - 124, 146, 240, 0, 66, 124, 224, 0, 17, 88, 175, 0, 4, 58, 122, 0, - 1, 36, 75, 0, 1, 18, 37, 0, 141, 79, 241, 0, 126, 70, 227, 0, - 66, 58, 182, 0, 30, 44, 136, 0, 12, 34, 96, 0, 2, 20, 47, 0, - 229, 99, 249, 0, 143, 111, 235, 0, 46, 109, 192, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 82, 158, 236, 0, 94, 146, 224, 0, - 25, 117, 191, 0, 9, 87, 149, 0, 3, 56, 99, 0, 1, 33, 57, 0, - 83, 167, 237, 0, 68, 145, 222, 0, 10, 103, 177, 0, 2, 72, 131, 0, - 1, 41, 79, 0, 1, 20, 39, 0, 99, 167, 239, 0, 47, 141, 224, 0, - 10, 104, 178, 0, 2, 73, 133, 0, 1, 44, 85, 0, 1, 22, 47, 0, - 127, 145, 243, 0, 71, 129, 228, 0, 17, 93, 177, 0, 3, 61, 124, 0, - 1, 41, 84, 0, 1, 21, 52, 0, 157, 78, 244, 0, 140, 72, 231, 0, - 69, 58, 184, 0, 31, 44, 137, 0, 14, 38, 105, 0, 8, 23, 61, 0, - 125, 34, 187, 0, 52, 41, 133, 0, 6, 31, 56, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 37, 109, 153, 0, 51, 102, 147, 0, - 23, 87, 128, 0, 8, 67, 101, 0, 1, 41, 63, 0, 1, 19, 29, 0, - 31, 154, 185, 0, 17, 127, 175, 0, 6, 96, 145, 0, 2, 73, 114, 0, - 1, 51, 82, 0, 1, 28, 45, 0, 23, 163, 200, 0, 10, 131, 185, 0, - 2, 93, 148, 0, 1, 67, 111, 0, 1, 41, 69, 0, 1, 14, 24, 0, - 29, 176, 217, 0, 12, 145, 201, 0, 3, 101, 156, 0, 1, 69, 111, 0, - 1, 39, 63, 0, 1, 14, 23, 0, 57, 192, 233, 0, 25, 154, 215, 0, - 6, 109, 167, 0, 3, 78, 118, 0, 1, 48, 69, 0, 1, 21, 29, 0, - 202, 105, 245, 0, 108, 106, 216, 0, 18, 90, 144, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 33, 172, 219, 0, 64, 149, 206, 0, - 14, 117, 177, 0, 5, 90, 141, 0, 2, 61, 95, 0, 1, 37, 57, 0, - 33, 179, 220, 0, 11, 140, 198, 0, 1, 89, 148, 0, 1, 60, 104, 0, - 1, 33, 57, 0, 1, 12, 21, 0, 30, 181, 221, 0, 8, 141, 198, 0, - 1, 87, 145, 0, 1, 58, 100, 0, 1, 31, 55, 0, 1, 12, 20, 0, - 32, 186, 224, 0, 7, 142, 198, 0, 1, 86, 143, 0, 1, 58, 100, 0, - 1, 31, 55, 0, 1, 12, 22, 0, 57, 192, 227, 0, 20, 143, 204, 0, - 3, 96, 154, 0, 1, 68, 112, 0, 1, 42, 69, 0, 1, 19, 32, 0, - 212, 35, 215, 0, 113, 47, 169, 0, 29, 48, 105, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 74, 129, 203, 0, 106, 120, 203, 0, - 49, 107, 178, 0, 19, 84, 144, 0, 4, 50, 84, 0, 1, 15, 25, 0, - 71, 172, 217, 0, 44, 141, 209, 0, 15, 102, 173, 0, 6, 76, 133, 0, - 2, 51, 89, 0, 1, 24, 42, 0, 64, 185, 231, 0, 31, 148, 216, 0, - 8, 103, 175, 0, 3, 74, 131, 0, 1, 46, 81, 0, 1, 18, 30, 0, - 65, 196, 235, 0, 25, 157, 221, 0, 5, 105, 174, 0, 1, 67, 120, 0, - 1, 38, 69, 0, 1, 15, 30, 0, 65, 204, 238, 0, 30, 156, 224, 0, - 7, 107, 177, 0, 2, 70, 124, 0, 1, 42, 73, 0, 1, 18, 34, 0, - 225, 86, 251, 0, 144, 104, 235, 0, 42, 99, 181, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 85, 175, 239, 0, 112, 165, 229, 0, - 29, 136, 200, 0, 12, 103, 162, 0, 6, 77, 123, 0, 2, 53, 84, 0, - 75, 183, 239, 0, 30, 155, 221, 0, 3, 106, 171, 0, 1, 74, 128, 0, - 1, 44, 76, 0, 1, 17, 28, 0, 73, 185, 240, 0, 27, 159, 222, 0, - 2, 107, 172, 0, 1, 75, 127, 0, 1, 42, 73, 0, 1, 17, 29, 0, - 62, 190, 238, 0, 21, 159, 222, 0, 2, 107, 172, 0, 1, 72, 122, 0, - 1, 40, 71, 0, 1, 18, 32, 0, 61, 199, 240, 0, 27, 161, 226, 0, - 4, 113, 180, 0, 1, 76, 129, 0, 1, 46, 80, 0, 1, 23, 41, 0, - 7, 27, 153, 0, 5, 30, 95, 0, 1, 16, 30, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 50, 75, 127, 0, 57, 75, 124, 0, - 27, 67, 108, 0, 10, 54, 86, 0, 1, 33, 52, 0, 1, 12, 18, 0, - 43, 125, 151, 0, 26, 108, 148, 0, 7, 83, 122, 0, 2, 59, 89, 0, - 1, 38, 60, 0, 1, 17, 27, 0, 23, 144, 163, 0, 13, 112, 154, 0, - 2, 75, 117, 0, 1, 50, 81, 0, 1, 31, 51, 0, 1, 14, 23, 0, - 18, 162, 185, 0, 6, 123, 171, 0, 1, 78, 125, 0, 1, 51, 86, 0, - 1, 31, 54, 0, 1, 14, 23, 0, 15, 199, 227, 0, 3, 150, 204, 0, - 1, 91, 146, 0, 1, 55, 95, 0, 1, 30, 53, 0, 1, 11, 20, 0, - 19, 55, 240, 0, 19, 59, 196, 0, 3, 52, 105, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 41, 166, 207, 0, 104, 153, 199, 0, - 31, 123, 181, 0, 14, 101, 152, 0, 5, 72, 106, 0, 1, 36, 52, 0, - 35, 176, 211, 0, 12, 131, 190, 0, 2, 88, 144, 0, 1, 60, 101, 0, - 1, 36, 60, 0, 1, 16, 28, 0, 28, 183, 213, 0, 8, 134, 191, 0, - 1, 86, 142, 0, 1, 56, 96, 0, 1, 30, 53, 0, 1, 12, 20, 0, - 20, 190, 215, 0, 4, 135, 192, 0, 1, 84, 139, 0, 1, 53, 91, 0, - 1, 28, 49, 0, 1, 11, 20, 0, 13, 196, 216, 0, 2, 137, 192, 0, - 1, 86, 143, 0, 1, 57, 99, 0, 1, 32, 56, 0, 1, 13, 24, 0, - 211, 29, 217, 0, 96, 47, 156, 0, 22, 43, 87, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 78, 120, 193, 0, 111, 116, 186, 0, - 46, 102, 164, 0, 15, 80, 128, 0, 2, 49, 76, 0, 1, 18, 28, 0, - 71, 161, 203, 0, 42, 132, 192, 0, 10, 98, 150, 0, 3, 69, 109, 0, - 1, 44, 70, 0, 1, 18, 29, 0, 57, 186, 211, 0, 30, 140, 196, 0, - 4, 93, 146, 0, 1, 62, 102, 0, 1, 38, 65, 0, 1, 16, 27, 0, - 47, 199, 217, 0, 14, 145, 196, 0, 1, 88, 142, 0, 1, 57, 98, 0, - 1, 36, 62, 0, 1, 15, 26, 0, 26, 219, 229, 0, 5, 155, 207, 0, - 1, 94, 151, 0, 1, 60, 104, 0, 1, 36, 62, 0, 1, 16, 28, 0, - 233, 29, 248, 0, 146, 47, 220, 0, 43, 52, 140, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 100, 163, 232, 0, 179, 161, 222, 0, - 63, 142, 204, 0, 37, 113, 174, 0, 26, 89, 137, 0, 18, 68, 97, 0, - 85, 181, 230, 0, 32, 146, 209, 0, 7, 100, 164, 0, 3, 71, 121, 0, - 1, 45, 77, 0, 1, 18, 30, 0, 65, 187, 230, 0, 20, 148, 207, 0, - 2, 97, 159, 0, 1, 68, 116, 0, 1, 40, 70, 0, 1, 14, 29, 0, - 40, 194, 227, 0, 8, 147, 204, 0, 1, 94, 155, 0, 1, 65, 112, 0, - 1, 39, 66, 0, 1, 14, 26, 0, 16, 208, 228, 0, 3, 151, 207, 0, - 1, 98, 160, 0, 1, 67, 117, 0, 1, 41, 74, 0, 1, 17, 31, 0, - 17, 38, 140, 0, 7, 34, 80, 0, 1, 17, 29, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 37, 75, 128, 0, 41, 76, 128, 0, - 26, 66, 116, 0, 12, 52, 94, 0, 2, 32, 55, 0, 1, 10, 16, 0, - 50, 127, 154, 0, 37, 109, 152, 0, 16, 82, 121, 0, 5, 59, 85, 0, - 1, 35, 54, 0, 1, 13, 20, 0, 40, 142, 167, 0, 17, 110, 157, 0, - 2, 71, 112, 0, 1, 44, 72, 0, 1, 27, 45, 0, 1, 11, 17, 0, - 30, 175, 188, 0, 9, 124, 169, 0, 1, 74, 116, 0, 1, 48, 78, 0, - 1, 30, 49, 0, 1, 11, 18, 0, 10, 222, 223, 0, 2, 150, 194, 0, - 1, 83, 128, 0, 1, 48, 79, 0, 1, 27, 45, 0, 1, 11, 17, 0, - 36, 41, 235, 0, 29, 36, 193, 0, 10, 27, 111, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 85, 165, 222, 0, 177, 162, 215, 0, - 110, 135, 195, 0, 57, 113, 168, 0, 23, 83, 120, 0, 10, 49, 61, 0, - 85, 190, 223, 0, 36, 139, 200, 0, 5, 90, 146, 0, 1, 60, 103, 0, - 1, 38, 65, 0, 1, 18, 30, 0, 72, 202, 223, 0, 23, 141, 199, 0, - 2, 86, 140, 0, 1, 56, 97, 0, 1, 36, 61, 0, 1, 16, 27, 0, - 55, 218, 225, 0, 13, 145, 200, 0, 1, 86, 141, 0, 1, 57, 99, 0, - 1, 35, 61, 0, 1, 13, 22, 0, 15, 235, 212, 0, 1, 132, 184, 0, - 1, 84, 139, 0, 1, 57, 97, 0, 1, 34, 56, 0, 1, 14, 23, 0, - 181, 21, 201, 0, 61, 37, 123, 0, 10, 38, 71, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 47, 106, 172, 0, 95, 104, 173, 0, - 42, 93, 159, 0, 18, 77, 131, 0, 4, 50, 81, 0, 1, 17, 23, 0, - 62, 147, 199, 0, 44, 130, 189, 0, 28, 102, 154, 0, 18, 75, 115, 0, - 2, 44, 65, 0, 1, 12, 19, 0, 55, 153, 210, 0, 24, 130, 194, 0, - 3, 93, 146, 0, 1, 61, 97, 0, 1, 31, 50, 0, 1, 10, 16, 0, - 49, 186, 223, 0, 17, 148, 204, 0, 1, 96, 142, 0, 1, 53, 83, 0, - 1, 26, 44, 0, 1, 11, 17, 0, 13, 217, 212, 0, 2, 136, 180, 0, - 1, 78, 124, 0, 1, 50, 83, 0, 1, 29, 49, 0, 1, 14, 23, 0, - 197, 13, 247, 0, 82, 17, 222, 0, 25, 17, 162, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 126, 186, 247, 0, 234, 191, 243, 0, - 176, 177, 234, 0, 104, 158, 220, 0, 66, 128, 186, 0, 55, 90, 137, 0, - 111, 197, 242, 0, 46, 158, 219, 0, 9, 104, 171, 0, 2, 65, 125, 0, - 1, 44, 80, 0, 1, 17, 91, 0, 104, 208, 245, 0, 39, 168, 224, 0, - 3, 109, 162, 0, 1, 79, 124, 0, 1, 50, 102, 0, 1, 43, 102, 0, - 84, 220, 246, 0, 31, 177, 231, 0, 2, 115, 180, 0, 1, 79, 134, 0, - 1, 55, 77, 0, 1, 60, 79, 0, 43, 243, 240, 0, 8, 180, 217, 0, - 1, 115, 166, 0, 1, 84, 121, 0, 1, 51, 67, 0, 1, 16, 6, 0 - }; - - private byte[] _defaultSkipProbs = new byte[] { 192, 128, 64 }; - - private byte[] _defaultInterModeProbs = new byte[] - { - 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94, 66, 0, - 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0 - }; - - private byte[] _defaultInterpFilterProbs = new byte[] - { - 235, 162, 36, 255, 34, 3, 149, 144 - }; - - private byte[] _defaultIsInterProbs = new byte[] { 9, 102, 187, 225 }; - - private byte[] _defaultCompModeProbs = new byte[] { 239, 183, 119, 96, 41 }; - - private byte[] _defaultSingleRefProbs = new byte[] - { - 33, 16, 77, 74, 142, 142, 172, 170, 238, 247 - }; - - private byte[] _defaultCompRefProbs = new byte[] { 50, 126, 123, 221, 226 }; - - private byte[] _defaultYModeProbs0 = new byte[] - { - 65, 32, 18, 144, 162, 194, 41, 51, 132, 68, 18, 165, 217, 196, 45, 40, - 173, 80, 19, 176, 240, 193, 64, 35, 221, 135, 38, 194, 248, 121, 96, 85 - }; - - private byte[] _defaultYModeProbs1 = new byte[] { 98, 78, 46, 29 }; - - private byte[] _defaultPartitionProbs = new byte[] - { - 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0, - 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0, - 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0, - 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0 - }; - - private byte[] _defaultMvJointProbs = new byte[] { 32, 64, 96 }; - - private byte[] _defaultMvSignProbs = new byte[] { 128, 128 }; - - private byte[] _defaultMvClassProbs = new byte[] - { - 224, 144, 192, 168, 192, 176, 192, 198, 198, 245, 216, 128, 176, 160, 176, 176, - 192, 198, 198, 208 - }; - - private byte[] _defaultMvClass0BitProbs = new byte[] { 216, 208 }; - - private byte[] _defaultMvBitsProbs = new byte[] - { - 136, 140, 148, 160, 176, 192, 224, 234, 234, 240, 136, 140, 148, 160, 176, 192, - 224, 234, 234, 240 - }; - - private byte[] _defaultMvClass0FrProbs = new byte[] - { - 128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64 - }; - - private byte[] _defaultMvFrProbs = new byte[] { 64, 96, 64, 64, 96, 64 }; - - private byte[] _defaultMvClass0HpProbs = new byte[] { 160, 160 }; - - private byte[] _defaultMvHpProbs = new byte[] { 128, 128 }; - - private sbyte[] _loopFilterRefDeltas; - private sbyte[] _loopFilterModeDeltas; - - private LinkedList _frameSlotByLastUse; - - private Dictionary> _cachedRefFrames; - - public Vp9Decoder() - { - _loopFilterRefDeltas = new sbyte[4]; - _loopFilterModeDeltas = new sbyte[2]; - - _frameSlotByLastUse = new LinkedList(); - - for (int slot = 0; slot < 8; slot++) - { - _frameSlotByLastUse.AddFirst(slot); - } - - _cachedRefFrames = new Dictionary>(); - } - - public void Decode( - Vp9FrameKeys keys, - Vp9FrameHeader header, - Vp9ProbabilityTables probs, - byte[] frameData) - { - bool isKeyFrame = ((header.Flags >> 0) & 1) != 0; - bool lastIsKeyFrame = ((header.Flags >> 1) & 1) != 0; - bool frameSizeChanged = ((header.Flags >> 2) & 1) != 0; - bool errorResilientMode = ((header.Flags >> 3) & 1) != 0; - bool lastShowFrame = ((header.Flags >> 4) & 1) != 0; - bool isFrameIntra = ((header.Flags >> 5) & 1) != 0; - - bool showFrame = !isFrameIntra; - - // Write compressed header. - byte[] compressedHeaderData; - - using (MemoryStream compressedHeader = new MemoryStream()) - { - VpxRangeEncoder writer = new VpxRangeEncoder(compressedHeader); - - if (!header.Lossless) - { - if ((uint)header.TxMode >= 3) - { - writer.Write(3, 2); - writer.Write(header.TxMode == 4); - } - else - { - writer.Write(header.TxMode, 2); - } - } - - if (header.TxMode == 4) - { - WriteProbabilityUpdate(writer, probs.Tx8x8Probs, DefaultTx8x8Probs); - WriteProbabilityUpdate(writer, probs.Tx16x16Probs, DefaultTx16x16Probs); - WriteProbabilityUpdate(writer, probs.Tx32x32Probs, DefaultTx32x32Probs); - } - - WriteCoefProbabilityUpdate(writer, header.TxMode, probs.CoefProbs, _defaultCoefProbs); - - WriteProbabilityUpdate(writer, probs.SkipProbs, _defaultSkipProbs); - - if (!isFrameIntra) - { - WriteProbabilityUpdateAligned4(writer, probs.InterModeProbs, _defaultInterModeProbs); - - if (header.RawInterpolationFilter == 4) - { - WriteProbabilityUpdate(writer, probs.InterpFilterProbs, _defaultInterpFilterProbs); - } - - WriteProbabilityUpdate(writer, probs.IsInterProbs, _defaultIsInterProbs); - - if ((header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[2] & 1) || - (header.RefFrameSignBias[1] & 1) != (header.RefFrameSignBias[3] & 1)) - { - if ((uint)header.CompPredMode >= 1) - { - writer.Write(1, 1); - writer.Write(header.CompPredMode == 2); - } - else - { - writer.Write(0, 1); - } - } - - if (header.CompPredMode == 2) - { - WriteProbabilityUpdate(writer, probs.CompModeProbs, _defaultCompModeProbs); - } - - if (header.CompPredMode != 1) - { - WriteProbabilityUpdate(writer, probs.SingleRefProbs, _defaultSingleRefProbs); - } - - if (header.CompPredMode != 0) - { - WriteProbabilityUpdate(writer, probs.CompRefProbs, _defaultCompRefProbs); - } - - for (int index = 0; index < 4; index++) - { - int i = index * 8; - int j = index; - - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 0], _defaultYModeProbs0[i + 0]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 1], _defaultYModeProbs0[i + 1]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 2], _defaultYModeProbs0[i + 2]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 3], _defaultYModeProbs0[i + 3]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 4], _defaultYModeProbs0[i + 4]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 5], _defaultYModeProbs0[i + 5]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 6], _defaultYModeProbs0[i + 6]); - WriteProbabilityUpdate(writer, probs.YModeProbs0[i + 7], _defaultYModeProbs0[i + 7]); - WriteProbabilityUpdate(writer, probs.YModeProbs1[j + 0], _defaultYModeProbs1[j + 0]); - } - - WriteProbabilityUpdateAligned4(writer, probs.PartitionProbs, _defaultPartitionProbs); - - for (int i = 0; i < 3; i++) - { - WriteMvProbabilityUpdate(writer, probs.MvJointProbs[i], _defaultMvJointProbs[i]); - } - - for (int i = 0; i < 2; i++) - { - WriteMvProbabilityUpdate(writer, probs.MvSignProbs[i], _defaultMvSignProbs[i]); - - for (int j = 0; j < 10; j++) - { - int index = i * 10 + j; - - WriteMvProbabilityUpdate(writer, probs.MvClassProbs[index], _defaultMvClassProbs[index]); - } - - WriteMvProbabilityUpdate(writer, probs.MvClass0BitProbs[i], _defaultMvClass0BitProbs[i]); - - for (int j = 0; j < 10; j++) - { - int index = i * 10 + j; - - WriteMvProbabilityUpdate(writer, probs.MvBitsProbs[index], _defaultMvBitsProbs[index]); - } - } - - for (int i = 0; i < 2; i++) - { - for (int j = 0; j < 2; j++) - { - for (int k = 0; k < 3; k++) - { - int index = i * 2 * 3 + j * 3 + k; - - WriteMvProbabilityUpdate(writer, probs.MvClass0FrProbs[index], _defaultMvClass0FrProbs[index]); - } - } - - for (int j = 0; j < 3; j++) - { - int index = i * 3 + j; - - WriteMvProbabilityUpdate(writer, probs.MvFrProbs[index], _defaultMvFrProbs[index]); - } - } - - if (header.AllowHighPrecisionMv) - { - for (int index = 0; index < 2; index++) - { - WriteMvProbabilityUpdate(writer, probs.MvClass0HpProbs[index], _defaultMvClass0HpProbs[index]); - WriteMvProbabilityUpdate(writer, probs.MvHpProbs[index], _defaultMvHpProbs[index]); - } - } - } - - writer.End(); - - compressedHeaderData = compressedHeader.ToArray(); - } - - // Write uncompressed header. - using (MemoryStream encodedHeader = new MemoryStream()) - { - VpxBitStreamWriter writer = new VpxBitStreamWriter(encodedHeader); - - writer.WriteU(2, 2); //Frame marker. - writer.WriteU(0, 2); //Profile. - writer.WriteBit(false); //Show existing frame. - writer.WriteBit(!isKeyFrame); - writer.WriteBit(showFrame); - writer.WriteBit(errorResilientMode); - - if (isKeyFrame) - { - writer.WriteU(FrameSyncCode, 24); - writer.WriteU(0, 3); //Color space. - writer.WriteU(0, 1); //Color range. - writer.WriteU(header.CurrentFrame.Width - 1, 16); - writer.WriteU(header.CurrentFrame.Height - 1, 16); - writer.WriteBit(false); //Render and frame size different. - - _cachedRefFrames.Clear(); - - // On key frames, all frame slots are set to the current frame, - // so the value of the selected slot doesn't really matter. - GetNewFrameSlot(keys.CurrKey); - } - else - { - if (!showFrame) - { - writer.WriteBit(isFrameIntra); - } - - if (!errorResilientMode) - { - writer.WriteU(0, 2); //Reset frame context. - } - - int refreshFrameFlags = 1 << GetNewFrameSlot(keys.CurrKey); - - if (isFrameIntra) - { - writer.WriteU(FrameSyncCode, 24); - writer.WriteU(refreshFrameFlags, 8); - writer.WriteU(header.CurrentFrame.Width - 1, 16); - writer.WriteU(header.CurrentFrame.Height - 1, 16); - writer.WriteBit(false); //Render and frame size different. - } - else - { - writer.WriteU(refreshFrameFlags, 8); - - int[] refFrameIndex = new int[] - { - GetFrameSlot(keys.Ref0Key), - GetFrameSlot(keys.Ref1Key), - GetFrameSlot(keys.Ref2Key) - }; - - byte[] refFrameSignBias = header.RefFrameSignBias; - - for (int index = 1; index < 4; index++) - { - writer.WriteU(refFrameIndex[index - 1], 3); - writer.WriteU(refFrameSignBias[index], 1); - } - - writer.WriteBit(true); //Frame size with refs. - writer.WriteBit(false); //Render and frame size different. - writer.WriteBit(header.AllowHighPrecisionMv); - writer.WriteBit(header.RawInterpolationFilter == 4); - - if (header.RawInterpolationFilter != 4) - { - writer.WriteU(header.RawInterpolationFilter, 2); - } - } - } - - if (!errorResilientMode) - { - writer.WriteBit(false); //Refresh frame context. - writer.WriteBit(true); //Frame parallel decoding mode. - } - - writer.WriteU(0, 2); //Frame context index. - - writer.WriteU(header.LoopFilterLevel, 6); - writer.WriteU(header.LoopFilterSharpness, 3); - writer.WriteBit(header.LoopFilterDeltaEnabled); - - if (header.LoopFilterDeltaEnabled) - { - bool[] updateLoopFilterRefDeltas = new bool[4]; - bool[] updateLoopFilterModeDeltas = new bool[2]; - - bool loopFilterDeltaUpdate = false; - - for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++) - { - sbyte old = _loopFilterRefDeltas[index]; - sbyte New = header.LoopFilterRefDeltas[index]; - - loopFilterDeltaUpdate |= (updateLoopFilterRefDeltas[index] = old != New); - } - - for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++) - { - sbyte old = _loopFilterModeDeltas[index]; - sbyte New = header.LoopFilterModeDeltas[index]; - - loopFilterDeltaUpdate |= (updateLoopFilterModeDeltas[index] = old != New); - } - - writer.WriteBit(loopFilterDeltaUpdate); - - if (loopFilterDeltaUpdate) - { - for (int index = 0; index < header.LoopFilterRefDeltas.Length; index++) - { - writer.WriteBit(updateLoopFilterRefDeltas[index]); - - if (updateLoopFilterRefDeltas[index]) - { - writer.WriteS(header.LoopFilterRefDeltas[index], 6); - } - } - - for (int index = 0; index < header.LoopFilterModeDeltas.Length; index++) - { - writer.WriteBit(updateLoopFilterModeDeltas[index]); - - if (updateLoopFilterModeDeltas[index]) - { - writer.WriteS(header.LoopFilterModeDeltas[index], 6); - } - } - } - } - - writer.WriteU(header.BaseQIndex, 8); - - writer.WriteDeltaQ(header.DeltaQYDc); - writer.WriteDeltaQ(header.DeltaQUvDc); - writer.WriteDeltaQ(header.DeltaQUvAc); - - writer.WriteBit(false); //Segmentation enabled (TODO). - - int minTileColsLog2 = CalcMinLog2TileCols(header.CurrentFrame.Width); - int maxTileColsLog2 = CalcMaxLog2TileCols(header.CurrentFrame.Width); - - int tileColsLog2Diff = header.TileColsLog2 - minTileColsLog2; - - int tileColsLog2IncMask = (1 << tileColsLog2Diff) - 1; - - // If it's less than the maximum, we need to add an extra 0 on the bitstream - // to indicate that it should stop reading. - if (header.TileColsLog2 < maxTileColsLog2) - { - writer.WriteU(tileColsLog2IncMask << 1, tileColsLog2Diff + 1); - } - else - { - writer.WriteU(tileColsLog2IncMask, tileColsLog2Diff); - } - - bool tileRowsLog2IsNonZero = header.TileRowsLog2 != 0; - - writer.WriteBit(tileRowsLog2IsNonZero); - - if (tileRowsLog2IsNonZero) - { - writer.WriteBit(header.TileRowsLog2 > 1); - } - - writer.WriteU(compressedHeaderData.Length, 16); - - writer.Flush(); - - encodedHeader.Write(compressedHeaderData, 0, compressedHeaderData.Length); - - if (!FFmpegWrapper.IsInitialized) - { - FFmpegWrapper.Vp9Initialize(); - } - - FFmpegWrapper.DecodeFrame(DecoderHelper.Combine(encodedHeader.ToArray(), frameData)); - } - - _loopFilterRefDeltas = header.LoopFilterRefDeltas; - _loopFilterModeDeltas = header.LoopFilterModeDeltas; - } - - private int GetNewFrameSlot(long key) - { - LinkedListNode node = _frameSlotByLastUse.Last; - - _frameSlotByLastUse.RemoveLast(); - _frameSlotByLastUse.AddFirst(node); - - _cachedRefFrames[key] = node; - - return node.Value; - } - - private int GetFrameSlot(long key) - { - if (_cachedRefFrames.TryGetValue(key, out LinkedListNode node)) - { - _frameSlotByLastUse.Remove(node); - _frameSlotByLastUse.AddFirst(node); - - return node.Value; - } - - // Reference frame was lost. - // What we should do in this case? - return 0; - } - - private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte[] New, byte[] old) - { - for (int offset = 0; offset < New.Length; offset++) - { - WriteProbabilityUpdate(writer, New[offset], old[offset]); - } - } - - private void WriteCoefProbabilityUpdate(VpxRangeEncoder writer, int txMode, byte[] New, byte[] old) - { - // Note: There's 1 byte added on each packet for alignment, - // this byte is ignored when doing updates. - const int blockBytes = 2 * 2 * 6 * 6 * 4; - - bool NeedsUpdate(int baseIndex) - { - int index = baseIndex; - - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 6; k++) - for (int l = 0; l < 6; l++) - { - if (New[index + 0] != old[index + 0] || - New[index + 1] != old[index + 1] || - New[index + 2] != old[index + 2]) - { - return true; - } - - index += 4; - } - - return false; - } - - for (int blockIndex = 0; blockIndex < 4; blockIndex++) - { - int baseIndex = blockIndex * blockBytes; - - bool update = NeedsUpdate(baseIndex); - - writer.Write(update); - - if (update) - { - int index = baseIndex; - - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) - for (int k = 0; k < 6; k++) - for (int l = 0; l < 6; l++) - { - if (k != 0 || l < 3) - { - WriteProbabilityUpdate(writer, New[index + 0], old[index + 0]); - WriteProbabilityUpdate(writer, New[index + 1], old[index + 1]); - WriteProbabilityUpdate(writer, New[index + 2], old[index + 2]); - } - - index += 4; - } - } - - if (blockIndex == txMode) - { - break; - } - } - } - - private void WriteProbabilityUpdateAligned4(VpxRangeEncoder writer, byte[] New, byte[] old) - { - for (int offset = 0; offset < New.Length; offset += 4) - { - WriteProbabilityUpdate(writer, New[offset + 0], old[offset + 0]); - WriteProbabilityUpdate(writer, New[offset + 1], old[offset + 1]); - WriteProbabilityUpdate(writer, New[offset + 2], old[offset + 2]); - } - } - - private void WriteProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old) - { - bool update = New != old; - - writer.Write(update, DiffUpdateProbability); - - if (update) - { - WriteProbabilityDelta(writer, New, old); - } - } - - private void WriteProbabilityDelta(VpxRangeEncoder writer, int New, int old) - { - int delta = RemapProbability(New, old); - - EncodeTermSubExp(writer, delta); - } - - private int RemapProbability(int New, int old) - { - New--; - old--; - - int index; - - if (old * 2 <= 0xff) - { - index = RecenterNonNeg(New, old) - 1; - } - else - { - index = RecenterNonNeg(0xff - 1 - New, 0xff - 1 - old) - 1; - } - - return MapLut[index]; - } - - private int RecenterNonNeg(int New, int old) - { - if (New > old * 2) - { - return New; - } - else if (New >= old) - { - return (New - old) * 2; - } - else /* if (New < Old) */ - { - return (old - New) * 2 - 1; - } - } - - private void EncodeTermSubExp(VpxRangeEncoder writer, int value) - { - if (WriteLessThan(writer, value, 16)) - { - writer.Write(value, 4); - } - else if (WriteLessThan(writer, value, 32)) - { - writer.Write(value - 16, 4); - } - else if (WriteLessThan(writer, value, 64)) - { - writer.Write(value - 32, 5); - } - else - { - value -= 64; - - const int size = 8; - - int mask = (1 << size) - 191; - - int delta = value - mask; - - if (delta < 0) - { - writer.Write(value, size - 1); - } - else - { - writer.Write(delta / 2 + mask, size - 1); - writer.Write(delta & 1, 1); - } - } - } - - private bool WriteLessThan(VpxRangeEncoder writer, int value, int test) - { - bool isLessThan = value < test; - - writer.Write(!isLessThan); - - return isLessThan; - } - - private void WriteMvProbabilityUpdate(VpxRangeEncoder writer, byte New, byte old) - { - bool update = New != old; - - writer.Write(update, DiffUpdateProbability); - - if (update) - { - writer.Write(New >> 1, 7); - } - } - - private static int CalcMinLog2TileCols(int frameWidth) - { - int sb64Cols = (frameWidth + 63) / 64; - int minLog2 = 0; - - while ((64 << minLog2) < sb64Cols) - { - minLog2++; - } - - return minLog2; - } - - private static int CalcMaxLog2TileCols(int frameWidth) - { - int sb64Cols = (frameWidth + 63) / 64; - int maxLog2 = 1; - - while ((sb64Cols >> maxLog2) >= 4) - { - maxLog2++; - } - - return maxLog2 - 1; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs deleted file mode 100644 index bdba6de5..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameHeader.cs +++ /dev/null @@ -1,79 +0,0 @@ -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.VDec -{ - [StructLayout(LayoutKind.Sequential, Pack = 2)] - struct Vp9FrameDimensions - { - public short Width; - public short Height; - public short SubsamplingX; //? - public short SubsamplingY; //? - } - - [StructLayout(LayoutKind.Sequential, Pack = 1)] - struct Vp9FrameHeader - { - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 3)] - public Vp9FrameDimensions[] RefFrames; - - public Vp9FrameDimensions CurrentFrame; - - public int Flags; - - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] - public byte[] RefFrameSignBias; - - public byte LoopFilterLevel; - public byte LoopFilterSharpness; - - public byte BaseQIndex; - public sbyte DeltaQYDc; - public sbyte DeltaQUvDc; - public sbyte DeltaQUvAc; - - [MarshalAs(UnmanagedType.I1)] - public bool Lossless; - - public byte TxMode; - - [MarshalAs(UnmanagedType.I1)] - public bool AllowHighPrecisionMv; - - public byte RawInterpolationFilter; - public byte CompPredMode; - public byte FixCompRef; - public byte VarCompRef0; - public byte VarCompRef1; - - public byte TileColsLog2; - public byte TileRowsLog2; - - [MarshalAs(UnmanagedType.I1)] - public bool SegmentationEnabled; - - [MarshalAs(UnmanagedType.I1)] - public bool SegmentationUpdate; - - [MarshalAs(UnmanagedType.I1)] - public bool SegmentationTemporalUpdate; - - [MarshalAs(UnmanagedType.I1)] - public bool SegmentationAbsOrDeltaUpdate; - - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4, ArraySubType = UnmanagedType.I1)] - public bool[] FeatureEnabled; - - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 8 * 4)] - public short[] FeatureData; - - [MarshalAs(UnmanagedType.I1)] - public bool LoopFilterDeltaEnabled; - - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 4)] - public sbyte[] LoopFilterRefDeltas; - - [MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)] - public sbyte[] LoopFilterModeDeltas; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs deleted file mode 100644 index dfc31ea3..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/Vp9FrameKeys.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - struct Vp9FrameKeys - { - public long CurrKey; - public long Ref0Key; - public long Ref1Key; - public long Ref2Key; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs b/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs deleted file mode 100644 index 5a6dd0cf..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/Vp9ProbabilityTables.cs +++ /dev/null @@ -1,31 +0,0 @@ -namespace Ryujinx.Graphics.VDec -{ - struct Vp9ProbabilityTables - { - public byte[] SegmentationTreeProbs; - public byte[] SegmentationPredProbs; - public byte[] Tx8x8Probs; - public byte[] Tx16x16Probs; - public byte[] Tx32x32Probs; - public byte[] CoefProbs; - public byte[] SkipProbs; - public byte[] InterModeProbs; - public byte[] InterpFilterProbs; - public byte[] IsInterProbs; - public byte[] CompModeProbs; - public byte[] SingleRefProbs; - public byte[] CompRefProbs; - public byte[] YModeProbs0; - public byte[] YModeProbs1; - public byte[] PartitionProbs; - public byte[] MvJointProbs; - public byte[] MvSignProbs; - public byte[] MvClassProbs; - public byte[] MvClass0BitProbs; - public byte[] MvBitsProbs; - public byte[] MvClass0FrProbs; - public byte[] MvFrProbs; - public byte[] MvClass0HpProbs; - public byte[] MvHpProbs; - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs deleted file mode 100644 index 97ada333..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/VpxBitStreamWriter.cs +++ /dev/null @@ -1,38 +0,0 @@ -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class VpxBitStreamWriter : BitStreamWriter - { - public VpxBitStreamWriter(Stream baseStream) : base(baseStream) { } - - public void WriteU(int value, int valueSize) - { - WriteBits(value, valueSize); - } - - public void WriteS(int value, int valueSize) - { - bool sign = value < 0; - - if (sign) - { - value = -value; - } - - WriteBits((value << 1) | (sign ? 1 : 0), valueSize + 1); - } - - public void WriteDeltaQ(int value) - { - bool deltaCoded = value != 0; - - WriteBit(deltaCoded); - - if (deltaCoded) - { - WriteBits(value, 4); - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs b/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs deleted file mode 100644 index c854c9d9..00000000 --- a/Ryujinx.Graphics.Nvdec/VDec/VpxRangeEncoder.cs +++ /dev/null @@ -1,134 +0,0 @@ -using System.IO; - -namespace Ryujinx.Graphics.VDec -{ - class VpxRangeEncoder - { - private const int HalfProbability = 128; - - private static readonly int[] NormLut = new int[] - { - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - - private Stream _baseStream; - - private uint _lowValue; - private uint _range; - private int _count; - - public VpxRangeEncoder(Stream baseStream) - { - _baseStream = baseStream; - - _range = 0xff; - _count = -24; - - Write(false); - } - - public void WriteByte(byte value) - { - Write(value, 8); - } - - public void Write(int value, int valueSize) - { - for (int bit = valueSize - 1; bit >= 0; bit--) - { - Write(((value >> bit) & 1) != 0); - } - } - - public void Write(bool bit) - { - Write(bit, HalfProbability); - } - - public void Write(bool bit, int probability) - { - uint range = _range; - - uint split = 1 + (((range - 1) * (uint)probability) >> 8); - - range = split; - - if (bit) - { - _lowValue += split; - range = _range - split; - } - - int shift = NormLut[range]; - - range <<= shift; - _count += shift; - - if (_count >= 0) - { - int offset = shift - _count; - - if (((_lowValue << (offset - 1)) >> 31) != 0) - { - long currentPos = _baseStream.Position; - - _baseStream.Seek(-1, SeekOrigin.Current); - - while (_baseStream.Position >= 0 && PeekByte() == 0xff) - { - _baseStream.WriteByte(0); - - _baseStream.Seek(-2, SeekOrigin.Current); - } - - _baseStream.WriteByte((byte)(PeekByte() + 1)); - - _baseStream.Seek(currentPos, SeekOrigin.Begin); - } - - _baseStream.WriteByte((byte)(_lowValue >> (24 - offset))); - - _lowValue <<= offset; - shift = _count; - _lowValue &= 0xffffff; - _count -= 8; - } - - _lowValue <<= shift; - - _range = range; - } - - private byte PeekByte() - { - byte value = (byte)_baseStream.ReadByte(); - - _baseStream.Seek(-1, SeekOrigin.Current); - - return value; - } - - public void End() - { - for (int index = 0; index < 32; index++) - { - Write(false); - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs b/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs deleted file mode 100644 index 4957e6b6..00000000 --- a/Ryujinx.Graphics.Nvdec/Vic/StructUnpacker.cs +++ /dev/null @@ -1,69 +0,0 @@ -using Ryujinx.Graphics.Gpu.Memory; -using System; - -namespace Ryujinx.Graphics.Vic -{ - class StructUnpacker - { - private MemoryAccessor _vmm; - - private ulong _position; - - private ulong _buffer; - private int _buffPos; - - public StructUnpacker(MemoryAccessor vmm, ulong position) - { - _vmm = vmm; - _position = position; - - _buffPos = 64; - } - - public int Read(int bits) - { - if ((uint)bits > 32) - { - throw new ArgumentOutOfRangeException(nameof(bits)); - } - - int value = 0; - - while (bits > 0) - { - RefillBufferIfNeeded(); - - int readBits = bits; - - int maxReadBits = 64 - _buffPos; - - if (readBits > maxReadBits) - { - readBits = maxReadBits; - } - - value <<= readBits; - - value |= (int)(_buffer >> _buffPos) & (int)(0xffffffff >> (32 - readBits)); - - _buffPos += readBits; - - bits -= readBits; - } - - return value; - } - - private void RefillBufferIfNeeded() - { - if (_buffPos >= 64) - { - _buffer = _vmm.ReadUInt64(_position); - - _position += 8; - - _buffPos = 0; - } - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs deleted file mode 100644 index bcb01e70..00000000 --- a/Ryujinx.Graphics.Nvdec/Vic/SurfaceOutputConfig.cs +++ /dev/null @@ -1,33 +0,0 @@ -namespace Ryujinx.Graphics.Vic -{ - struct SurfaceOutputConfig - { - public SurfacePixelFormat PixelFormat; - - public int SurfaceWidth; - public int SurfaceHeight; - public int GobBlockHeight; - - public ulong SurfaceLumaAddress; - public ulong SurfaceChromaUAddress; - public ulong SurfaceChromaVAddress; - - public SurfaceOutputConfig( - SurfacePixelFormat pixelFormat, - int surfaceWidth, - int surfaceHeight, - int gobBlockHeight, - ulong outputSurfaceLumaAddress, - ulong outputSurfaceChromaUAddress, - ulong outputSurfaceChromaVAddress) - { - PixelFormat = pixelFormat; - SurfaceWidth = surfaceWidth; - SurfaceHeight = surfaceHeight; - GobBlockHeight = gobBlockHeight; - SurfaceLumaAddress = outputSurfaceLumaAddress; - SurfaceChromaUAddress = outputSurfaceChromaUAddress; - SurfaceChromaVAddress = outputSurfaceChromaVAddress; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs b/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs deleted file mode 100644 index 8dabd094..00000000 --- a/Ryujinx.Graphics.Nvdec/Vic/SurfacePixelFormat.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Ryujinx.Graphics.Vic -{ - enum SurfacePixelFormat - { - Rgba8 = 0x1f, - Yuv420P = 0x44 - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs deleted file mode 100644 index 39e18fa6..00000000 --- a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposer.cs +++ /dev/null @@ -1,94 +0,0 @@ -using Ryujinx.Graphics.Gpu; -using Ryujinx.Graphics.VDec; - -namespace Ryujinx.Graphics.Vic -{ - class VideoImageComposer - { - private ulong _configStructAddress; - private ulong _outputSurfaceLumaAddress; - private ulong _outputSurfaceChromaUAddress; - private ulong _outputSurfaceChromaVAddress; - - private VideoDecoder _vdec; - - public VideoImageComposer(VideoDecoder vdec) - { - _vdec = vdec; - } - - public void Process(GpuContext gpu, int methodOffset, int[] arguments) - { - VideoImageComposerMeth method = (VideoImageComposerMeth)methodOffset; - - switch (method) - { - case VideoImageComposerMeth.Execute: Execute(gpu); break; - case VideoImageComposerMeth.SetConfigStructOffset: SetConfigStructOffset(arguments); break; - case VideoImageComposerMeth.SetOutputSurfaceLumaOffset: SetOutputSurfaceLumaOffset(arguments); break; - case VideoImageComposerMeth.SetOutputSurfaceChromaUOffset: SetOutputSurfaceChromaUOffset(arguments); break; - case VideoImageComposerMeth.SetOutputSurfaceChromaVOffset: SetOutputSurfaceChromaVOffset(arguments); break; - } - } - - private void Execute(GpuContext gpu) - { - StructUnpacker unpacker = new StructUnpacker(gpu.MemoryAccessor, _configStructAddress + 0x20); - - SurfacePixelFormat pixelFormat = (SurfacePixelFormat)unpacker.Read(7); - - int chromaLocHoriz = unpacker.Read(2); - int chromaLocVert = unpacker.Read(2); - - int blockLinearKind = unpacker.Read(4); - int blockLinearHeightLog2 = unpacker.Read(4); - - int reserved0 = unpacker.Read(3); - int reserved1 = unpacker.Read(10); - - int surfaceWidthMinus1 = unpacker.Read(14); - int surfaceHeightMinus1 = unpacker.Read(14); - - int gobBlockHeight = 1 << blockLinearHeightLog2; - - int surfaceWidth = surfaceWidthMinus1 + 1; - int surfaceHeight = surfaceHeightMinus1 + 1; - - SurfaceOutputConfig outputConfig = new SurfaceOutputConfig( - pixelFormat, - surfaceWidth, - surfaceHeight, - gobBlockHeight, - _outputSurfaceLumaAddress, - _outputSurfaceChromaUAddress, - _outputSurfaceChromaVAddress); - - _vdec.CopyPlanes(gpu, outputConfig); - } - - private void SetConfigStructOffset(int[] arguments) - { - _configStructAddress = GetAddress(arguments); - } - - private void SetOutputSurfaceLumaOffset(int[] arguments) - { - _outputSurfaceLumaAddress = GetAddress(arguments); - } - - private void SetOutputSurfaceChromaUOffset(int[] arguments) - { - _outputSurfaceChromaUAddress = GetAddress(arguments); - } - - private void SetOutputSurfaceChromaVOffset(int[] arguments) - { - _outputSurfaceChromaVAddress = GetAddress(arguments); - } - - private static ulong GetAddress(int[] arguments) - { - return (ulong)(uint)arguments[0] << 8; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs b/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs deleted file mode 100644 index b30cabea..00000000 --- a/Ryujinx.Graphics.Nvdec/Vic/VideoImageComposerMeth.cs +++ /dev/null @@ -1,12 +0,0 @@ -namespace Ryujinx.Graphics.Vic -{ - enum VideoImageComposerMeth - { - Execute = 0xc0, - SetControlParams = 0x1c1, - SetConfigStructOffset = 0x1c2, - SetOutputSurfaceLumaOffset = 0x1c8, - SetOutputSurfaceChromaUOffset = 0x1c9, - SetOutputSurfaceChromaVOffset = 0x1ca - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs new file mode 100644 index 00000000..f05555c6 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs @@ -0,0 +1,92 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; +using Ryujinx.Graphics.Nvdec.Types.Vp9; +using Ryujinx.Graphics.Nvdec.Vp9; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using static Ryujinx.Graphics.Nvdec.MemoryExtensions; + +namespace Ryujinx.Graphics.Nvdec +{ + static class Vp9Decoder + { + private static Decoder _decoder = new Decoder(); + + public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + { + PictureInfo pictureInfo = rm.Gmm.DeviceRead(state.SetPictureInfoOffset); + EntropyProbs entropy = rm.Gmm.DeviceRead(state.SetVp9EntropyProbsOffset); + + ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size) + { + return rm.Cache.Get(_decoder, CodecId.Vp9, lumaOffset, chromaOffset, size.Width, size.Height); + } + + ISurface lastSurface = Rent(state.SetSurfaceLumaOffset[0], state.SetSurfaceChromaOffset[0], pictureInfo.LastFrameSize); + ISurface goldenSurface = Rent(state.SetSurfaceLumaOffset[1], state.SetSurfaceChromaOffset[1], pictureInfo.GoldenFrameSize); + ISurface altSurface = Rent(state.SetSurfaceLumaOffset[2], state.SetSurfaceChromaOffset[2], pictureInfo.AltFrameSize); + ISurface currentSurface = Rent(state.SetSurfaceLumaOffset[3], state.SetSurfaceChromaOffset[3], pictureInfo.CurrentFrameSize); + + Vp9PictureInfo info = pictureInfo.Convert(); + + info.LastReference = lastSurface; + info.GoldenReference = goldenSurface; + info.AltReference = altSurface; + + entropy.Convert(ref info.Entropy); + + ReadOnlySpan bitstream = rm.Gmm.DeviceGetSpan(state.SetBitstreamOffset, (int)pictureInfo.BitstreamSize); + + ReadOnlySpan mvsIn = ReadOnlySpan.Empty; + + if (info.UsePrevInFindMvRefs) + { + mvsIn = GetMvsInput(rm.Gmm, pictureInfo.CurrentFrameSize, state.SetVp9LastFrameMvsOffset); + } + + int miCols = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Width, 8); + int miRows = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Height, 8); + + using var mvsRegion = rm.Gmm.GetWritableRegion(ExtendOffset(state.SetVp9CurrFrameMvsOffset), miRows * miCols * 16); + + Span mvsOut = MemoryMarshal.Cast(mvsRegion.Memory.Span); + + uint lumaOffset = state.SetSurfaceLumaOffset[3]; + uint chromaOffset = state.SetSurfaceChromaOffset[3]; + + if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut)) + { + SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset); + + device.OnFrameDecoded(CodecId.Vp9, lumaOffset, chromaOffset); + } + + WriteBackwardUpdates(rm.Gmm, state.SetVp9BackwardUpdatesOffset, ref info.BackwardUpdateCounts); + + rm.Cache.Put(lastSurface); + rm.Cache.Put(goldenSurface); + rm.Cache.Put(altSurface); + rm.Cache.Put(currentSurface); + } + + private static ReadOnlySpan GetMvsInput(MemoryManager gmm, FrameSize size, uint offset) + { + int miCols = BitUtils.DivRoundUp(size.Width, 8); + int miRows = BitUtils.DivRoundUp(size.Height, 8); + + return MemoryMarshal.Cast(gmm.DeviceGetSpan(offset, miRows * miCols * 16)); + } + + private static void WriteBackwardUpdates(MemoryManager gmm, uint offset, ref Vp9BackwardUpdates counts) + { + using var backwardUpdatesRegion = gmm.GetWritableRegion(ExtendOffset(offset), Unsafe.SizeOf()); + + ref var backwardUpdates = ref MemoryMarshal.Cast(backwardUpdatesRegion.Memory.Span)[0]; + + backwardUpdates = new BackwardUpdates(ref counts); + } + } +} diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index 2ad57d59..ed046fb5 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -9,6 +9,90 @@ namespace Ryujinx.Graphics.Texture { private const int HostStrideAlignment = 4; + public static void ConvertBlockLinearToLinear( + Span dst, + int width, + int height, + int stride, + int bytesPerPixel, + int gobBlocksInY, + ReadOnlySpan data) + { + int gobHeight = gobBlocksInY * GobHeight; + + int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + + int outStrideGap = stride - width * bytesPerPixel; + + int alignment = GobStride / bytesPerPixel; + + int wAligned = BitUtils.AlignUp(width, alignment); + + BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); + + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged + { + fixed (byte* outputPtr = output, dataPtr = data) + { + byte* outPtr = outputPtr; + + for (int y = 0; y < height; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) + { + byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)offset; + Vector128 value2 = *(Vector128*)offset2; + Vector128 value3 = *(Vector128*)offset3; + Vector128 value4 = *(Vector128*)offset4; + + *(Vector128*)outPtr = value; + *(Vector128*)(outPtr + 16) = value2; + *(Vector128*)(outPtr + 32) = value3; + *(Vector128*)(outPtr + 48) = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) + { + byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128*)outPtr = *(Vector128*)offset; + } + + for (int x = xStart; x < width; x++, outPtr += bytesPerPixel) + { + byte* offset = dataPtr + layoutConverter.GetOffset(x); + + *(T*)outPtr = *(T*)offset; + } + + outPtr += outStrideGap; + } + } + return true; + } + + bool _ = bytesPerPixel switch + { + 1 => Convert(dst, data), + 2 => Convert(dst, data), + 4 => Convert(dst, data), + 8 => Convert(dst, data), + 12 => Convert(dst, data), + 16 => Convert>(dst, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; + } + public static Span ConvertBlockLinearToLinear( int width, int height, @@ -190,6 +274,90 @@ namespace Ryujinx.Graphics.Texture return output; } + public static void ConvertLinearToBlockLinear( + Span dst, + int width, + int height, + int stride, + int bytesPerPixel, + int gobBlocksInY, + ReadOnlySpan data) + { + int gobHeight = gobBlocksInY * GobHeight; + + int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + + int inStrideGap = stride - width * bytesPerPixel; + + int alignment = GobStride / bytesPerPixel; + + int wAligned = BitUtils.AlignUp(width, alignment); + + BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); + + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged + { + fixed (byte* outputPtr = output, dataPtr = data) + { + byte* inPtr = dataPtr; + + for (int y = 0; y < height; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) + { + byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)inPtr; + Vector128 value2 = *(Vector128*)(inPtr + 16); + Vector128 value3 = *(Vector128*)(inPtr + 32); + Vector128 value4 = *(Vector128*)(inPtr + 48); + + *(Vector128*)offset = value; + *(Vector128*)offset2 = value2; + *(Vector128*)offset3 = value3; + *(Vector128*)offset4 = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) + { + byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128*)offset = *(Vector128*)inPtr; + } + + for (int x = xStart; x < width; x++, inPtr += bytesPerPixel) + { + byte* offset = outputPtr + layoutConverter.GetOffset(x); + + *(T*)offset = *(T*)inPtr; + } + + inPtr += inStrideGap; + } + } + return true; + } + + bool _ = bytesPerPixel switch + { + 1 => Convert(dst, data), + 2 => Convert(dst, data), + 4 => Convert(dst, data), + 8 => Convert(dst, data), + 12 => Convert(dst, data), + 16 => Convert>(dst, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; + } + public static Span ConvertLinearToBlockLinear( int width, int height, diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs index 6d283954..dd4b6e7f 100644 --- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs +++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs @@ -94,6 +94,19 @@ namespace Ryujinx.Graphics.Texture } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset64(int x) + { + if (_isLinear) + { + return x + _yPart; + } + else + { + return _layoutConverter.GetOffsetWithLineOffset64(x); + } + } + public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) { if (_isLinear) diff --git a/Ryujinx.Graphics.Vic/Blender.cs b/Ryujinx.Graphics.Vic/Blender.cs new file mode 100644 index 00000000..f00b9093 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Blender.cs @@ -0,0 +1,157 @@ +using Ryujinx.Graphics.Vic.Image; +using Ryujinx.Graphics.Vic.Types; +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace Ryujinx.Graphics.Vic +{ + static class Blender + { + public static void BlendOne(Surface dst, Surface src, ref SlotStruct slot) + { + if (Sse41.IsSupported && (dst.Width & 3) == 0) + { + BlendOneSse41(dst, src, ref slot); + return; + } + + for (int y = 0; y < dst.Height; y++) + { + for (int x = 0; x < dst.Width; x++) + { + int inR = src.GetR(x, y); + int inG = src.GetG(x, y); + int inB = src.GetB(x, y); + + MatrixMultiply(ref slot.ColorMatrixStruct, inR, inG, inB, out int r, out int g, out int b); + + r = Math.Clamp(r, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh); + g = Math.Clamp(g, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh); + b = Math.Clamp(b, slot.SlotConfig.SoftClampLow, slot.SlotConfig.SoftClampHigh); + + dst.SetR(x, y, (ushort)r); + dst.SetG(x, y, (ushort)g); + dst.SetB(x, y, (ushort)b); + dst.SetA(x, y, src.GetA(x, y)); + } + } + } + + private unsafe static void BlendOneSse41(Surface dst, Surface src, ref SlotStruct slot) + { + Debug.Assert((dst.Width & 3) == 0); + + ref MatrixStruct mtx = ref slot.ColorMatrixStruct; + + int one = 1 << (mtx.MatrixRShift + 8); + + Vector128 col1 = Vector128.Create(mtx.MatrixCoeff00, mtx.MatrixCoeff10, mtx.MatrixCoeff20, 0); + Vector128 col2 = Vector128.Create(mtx.MatrixCoeff01, mtx.MatrixCoeff11, mtx.MatrixCoeff21, 0); + Vector128 col3 = Vector128.Create(mtx.MatrixCoeff02, mtx.MatrixCoeff12, mtx.MatrixCoeff22, one); + Vector128 col4 = Vector128.Create(mtx.MatrixCoeff03, mtx.MatrixCoeff13, mtx.MatrixCoeff23, 0); + Vector128 rShift = Vector128.CreateScalar(mtx.MatrixRShift); + Vector128 clMin = Vector128.Create((ushort)slot.SlotConfig.SoftClampLow); + Vector128 clMax = Vector128.Create((ushort)slot.SlotConfig.SoftClampHigh); + + fixed (Pixel* srcPtr = src.Data, dstPtr = dst.Data) + { + Pixel* ip = srcPtr; + Pixel* op = dstPtr; + + for (int y = 0; y < dst.Height; y++, ip += src.Width, op += dst.Width) + { + for (int x = 0; x < dst.Width; x += 4) + { + Vector128 pixel1 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x)); + Vector128 pixel2 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 1)); + Vector128 pixel3 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 2)); + Vector128 pixel4 = Sse41.ConvertToVector128Int32((ushort*)(ip + (uint)x + 3)); + + Vector128 pixel12, pixel34; + + if (mtx.MatrixEnable) + { + pixel12 = Sse41.PackUnsignedSaturate( + MatrixMultiplySse41(pixel1, col1, col2, col3, col4, rShift), + MatrixMultiplySse41(pixel2, col1, col2, col3, col4, rShift)); + pixel34 = Sse41.PackUnsignedSaturate( + MatrixMultiplySse41(pixel3, col1, col2, col3, col4, rShift), + MatrixMultiplySse41(pixel4, col1, col2, col3, col4, rShift)); + } + else + { + pixel12 = Sse41.PackUnsignedSaturate(pixel1, pixel2); + pixel34 = Sse41.PackUnsignedSaturate(pixel3, pixel4); + } + + pixel12 = Sse41.Min(pixel12, clMax); + pixel34 = Sse41.Min(pixel34, clMax); + pixel12 = Sse41.Max(pixel12, clMin); + pixel34 = Sse41.Max(pixel34, clMin); + + Sse2.Store((ushort*)(op + (uint)x + 0), pixel12); + Sse2.Store((ushort*)(op + (uint)x + 2), pixel34); + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void MatrixMultiply(ref MatrixStruct mtx, int x, int y, int z, out int r, out int g, out int b) + { + if (mtx.MatrixEnable) + { + r = x * mtx.MatrixCoeff00 + y * mtx.MatrixCoeff01 + z * mtx.MatrixCoeff02; + g = x * mtx.MatrixCoeff10 + y * mtx.MatrixCoeff11 + z * mtx.MatrixCoeff12; + b = x * mtx.MatrixCoeff20 + y * mtx.MatrixCoeff21 + z * mtx.MatrixCoeff22; + + r >>= mtx.MatrixRShift; + g >>= mtx.MatrixRShift; + b >>= mtx.MatrixRShift; + + r += mtx.MatrixCoeff03; + g += mtx.MatrixCoeff13; + b += mtx.MatrixCoeff23; + + r >>= 8; + g >>= 8; + b >>= 8; + } + else + { + r = x; + g = y; + b = z; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 MatrixMultiplySse41( + Vector128 pixel, + Vector128 col1, + Vector128 col2, + Vector128 col3, + Vector128 col4, + Vector128 rShift) + { + Vector128 x = Sse2.Shuffle(pixel, 0); + Vector128 y = Sse2.Shuffle(pixel, 0x55); + Vector128 z = Sse2.Shuffle(pixel, 0xea); + + col1 = Sse41.MultiplyLow(col1, x); + col2 = Sse41.MultiplyLow(col2, y); + col3 = Sse41.MultiplyLow(col3, z); + + Vector128 res = Sse2.Add(col3, Sse2.Add(col1, col2)); + + res = Sse2.ShiftRightArithmetic(res, rShift); + res = Sse2.Add(res, col4); + res = Sse2.ShiftRightArithmetic(res, 8); + + return res; + } + } +} diff --git a/Ryujinx.Graphics.Vic/Image/BufferPool.cs b/Ryujinx.Graphics.Vic/Image/BufferPool.cs new file mode 100644 index 00000000..932d3dc9 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/BufferPool.cs @@ -0,0 +1,103 @@ +using System; + +namespace Ryujinx.Graphics.Vic.Image +{ + class BufferPool + { + /// + /// Maximum number of buffers on the pool. + /// + private const int MaxBuffers = 4; + + /// + /// Maximum size of a buffer that can be added on the pool. + /// If the required buffer is larger than this, it won't be + /// added to the pool to avoid long term high memory usage. + /// + private const int MaxBufferSize = 2048 * 1280; + + private struct PoolItem + { + public bool InUse; + public T[] Buffer; + } + + private readonly PoolItem[] _pool = new PoolItem[MaxBuffers]; + + /// + /// Rents a buffer with the exact size requested. + /// + /// Size of the buffer + /// Span of the requested size + /// The index of the buffer on the pool + public int Rent(int length, out Span buffer) + { + int index = RentMinimum(length, out T[] bufferArray); + + buffer = new Span(bufferArray).Slice(0, length); + + return index; + } + + /// + /// Rents a buffer with a size greater than or equal to the requested size. + /// + /// Size of the buffer + /// Array with a length greater than or equal to the requested length + /// The index of the buffer on the pool + public int RentMinimum(int length, out T[] buffer) + { + if ((uint)length > MaxBufferSize) + { + buffer = new T[length]; + return -1; + } + + // Try to find a buffer that is larger or the same size of the requested one. + // This will avoid an allocation. + for (int i = 0; i < MaxBuffers; i++) + { + ref PoolItem item = ref _pool[i]; + + if (!item.InUse && item.Buffer != null && item.Buffer.Length >= length) + { + buffer = item.Buffer; + item.InUse = true; + return i; + } + } + + buffer = new T[length]; + + // Try to add the new buffer to the pool. + // We try to find a slot that is not in use, and replace the buffer in it. + for (int i = 0; i < MaxBuffers; i++) + { + ref PoolItem item = ref _pool[i]; + + if (!item.InUse) + { + item.Buffer = buffer; + item.InUse = true; + return i; + } + } + + return -1; + } + + /// + /// Returns a buffer returned from to the pool. + /// + /// Index of the buffer on the pool + public void Return(int index) + { + if (index < 0) + { + return; + } + + _pool[index].InUse = false; + } + } +} diff --git a/Ryujinx.Graphics.Vic/Image/InputSurface.cs b/Ryujinx.Graphics.Vic/Image/InputSurface.cs new file mode 100644 index 00000000..de003194 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/InputSurface.cs @@ -0,0 +1,17 @@ +using System; + +namespace Ryujinx.Graphics.Vic.Image +{ + ref struct InputSurface + { + public ReadOnlySpan Buffer0; + public ReadOnlySpan Buffer1; + public ReadOnlySpan Buffer2; + + public int Width; + public int Height; + + public int UvWidth; + public int UvHeight; + } +} diff --git a/Ryujinx.Graphics.Vic/Image/Pixel.cs b/Ryujinx.Graphics.Vic/Image/Pixel.cs new file mode 100644 index 00000000..35f25d16 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/Pixel.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Vic.Image +{ + struct Pixel + { + public ushort R; + public ushort G; + public ushort B; + public ushort A; + } +} diff --git a/Ryujinx.Graphics.Vic/Image/Surface.cs b/Ryujinx.Graphics.Vic/Image/Surface.cs new file mode 100644 index 00000000..03767f8a --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/Surface.cs @@ -0,0 +1,46 @@ +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Vic.Image +{ + struct Surface : IDisposable + { + private readonly int _bufferIndex; + + private readonly BufferPool _pool; + + public Pixel[] Data { get; } + + public int Width { get; } + public int Height { get; } + + public Surface(BufferPool pool, int width, int height) + { + _bufferIndex = pool.RentMinimum(width * height, out Pixel[] data); + _pool = pool; + Data = data; + Width = width; + Height = height; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ushort GetR(int x, int y) => Data[y * Width + x].R; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ushort GetG(int x, int y) => Data[y * Width + x].G; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ushort GetB(int x, int y) => Data[y * Width + x].B; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public ushort GetA(int x, int y) => Data[y * Width + x].A; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetR(int x, int y, ushort value) => Data[y * Width + x].R = value; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetG(int x, int y, ushort value) => Data[y * Width + x].G = value; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetB(int x, int y, ushort value) => Data[y * Width + x].B = value; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void SetA(int x, int y, ushort value) => Data[y * Width + x].A = value; + + public void Dispose() => _pool.Return(_bufferIndex); + } +} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs b/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs new file mode 100644 index 00000000..10cdefe2 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs @@ -0,0 +1,33 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Texture; + +namespace Ryujinx.Graphics.Vic.Image +{ + static class SurfaceCommon + { + public static int GetPitch(int width, int bytesPerPixel) + { + return BitUtils.AlignUp(width * bytesPerPixel, 256); + } + + public static int GetBlockLinearSize(int width, int height, int bytesPerPixel, int gobBlocksInY) + { + return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, gobBlocksInY, 1, 1).TotalSize; + } + + public static ulong ExtendOffset(uint offset) + { + return (ulong)offset << 8; + } + + public static ushort Upsample(byte value) + { + return (ushort)(value << 2); + } + + public static byte Downsample(ushort value) + { + return (byte)(value >> 2); + } + } +} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs new file mode 100644 index 00000000..ab591cd0 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs @@ -0,0 +1,253 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Vic.Types; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; + +namespace Ryujinx.Graphics.Vic.Image +{ + static class SurfaceReader + { + public static Surface Read(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets) + { + switch (config.SlotPixelFormat) + { + case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref offsets); + } + + Logger.PrintError(LogClass.Vic, $"Unsupported pixel format \"{config.SlotPixelFormat}\"."); + + int lw = config.SlotLumaWidth + 1; + int lh = config.SlotLumaHeight + 1; + + return new Surface(rm.SurfacePool, lw, lh); + } + + private unsafe static Surface ReadNv12(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets) + { + InputSurface input = ReadSurface(rm.Gmm, ref config, ref offsets, 1, 2); + + int width = input.Width; + int height = input.Height; + + int yStride = GetPitch(width, 1); + int uvStride = GetPitch(input.UvWidth, 2); + + Surface output = new Surface(rm.SurfacePool, width, height); + + if (Sse41.IsSupported) + { + Vector128 shufMask = Vector128.Create( + (byte)0, (byte)2, (byte)3, (byte)1, + (byte)4, (byte)6, (byte)7, (byte)5, + (byte)8, (byte)10, (byte)11, (byte)9, + (byte)12, (byte)14, (byte)15, (byte)13); + Vector128 alphaMask = Vector128.Create(0xffUL << 48).AsInt16(); + + int yStrideGap = yStride - width; + int uvStrideGap = uvStride - input.UvWidth; + + int widthTrunc = width & ~0xf; + + fixed (Pixel* dstPtr = output.Data) + { + Pixel* op = dstPtr; + + fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) + { + byte* i0p = src0Ptr; + + for (int y = 0; y < height; y++) + { + byte* i1p = src1Ptr + (y >> 1) * uvStride; + + int x = 0; + + for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) + { + Vector128 ya0 = Sse41.ConvertToVector128Int16(i0p); + Vector128 ya1 = Sse41.ConvertToVector128Int16(i0p + 8); + + Vector128 uv = Sse2.LoadVector128(i1p); + + Vector128 uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16()); + Vector128 uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16()); + + Vector128 rgba0 = Sse2.UnpackLow(ya0, uv0); + Vector128 rgba1 = Sse2.UnpackHigh(ya0, uv0); + Vector128 rgba2 = Sse2.UnpackLow(ya1, uv1); + Vector128 rgba3 = Sse2.UnpackHigh(ya1, uv1); + + rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16(); + rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16(); + rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16(); + rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16(); + + Vector128 rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte()); + Vector128 rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte())); + Vector128 rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte()); + Vector128 rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte())); + Vector128 rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte()); + Vector128 rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte())); + Vector128 rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte()); + Vector128 rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte())); + + rgba16_0 = Sse2.Or(rgba16_0, alphaMask); + rgba16_1 = Sse2.Or(rgba16_1, alphaMask); + rgba16_2 = Sse2.Or(rgba16_2, alphaMask); + rgba16_3 = Sse2.Or(rgba16_3, alphaMask); + rgba16_4 = Sse2.Or(rgba16_4, alphaMask); + rgba16_5 = Sse2.Or(rgba16_5, alphaMask); + rgba16_6 = Sse2.Or(rgba16_6, alphaMask); + rgba16_7 = Sse2.Or(rgba16_7, alphaMask); + + rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2); + rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2); + rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2); + rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2); + rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2); + rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2); + rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2); + rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2); + + Sse2.Store((short*)(op + (uint)x + 0), rgba16_0); + Sse2.Store((short*)(op + (uint)x + 2), rgba16_1); + Sse2.Store((short*)(op + (uint)x + 4), rgba16_2); + Sse2.Store((short*)(op + (uint)x + 6), rgba16_3); + Sse2.Store((short*)(op + (uint)x + 8), rgba16_4); + Sse2.Store((short*)(op + (uint)x + 10), rgba16_5); + Sse2.Store((short*)(op + (uint)x + 12), rgba16_6); + Sse2.Store((short*)(op + (uint)x + 14), rgba16_7); + } + + for (; x < width; x++, i1p += (x & 1) * 2) + { + Pixel* px = op + (uint)x; + + px->R = Upsample(*i0p++); + px->G = Upsample(*i1p); + px->B = Upsample(*(i1p + 1)); + px->A = 0x3ff; + } + + op += width; + i0p += yStrideGap; + i1p += uvStrideGap; + } + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int uvBase = (y >> 1) * uvStride; + + for (int x = 0; x < width; x++) + { + output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x])); + + int uvOffs = uvBase + (x & ~1); + + output.SetG(x, y, Upsample(input.Buffer1[uvOffs])); + output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1])); + output.SetA(x, y, 0x3ff); + } + } + } + + return output; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 HighToLow(Vector128 value) + { + return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte(); + } + + private static InputSurface ReadSurface( + MemoryManager gmm, + ref SlotSurfaceConfig config, + ref PlaneOffsets offsets, + int bytesPerPixel, + int planes) + { + InputSurface surface = new InputSurface(); + + int gobBlocksInY = 1 << config.SlotBlkHeight; + + bool linear = config.SlotBlkKind == 0; + + int lw = config.SlotLumaWidth + 1; + int lh = config.SlotLumaHeight + 1; + + int cw = config.SlotChromaWidth + 1; + int ch = config.SlotChromaHeight + 1; + + surface.Width = lw; + surface.Height = lh; + surface.UvWidth = cw; + surface.UvHeight = ch; + + if (planes > 0) + { + surface.Buffer0 = ReadBuffer(gmm, offsets.LumaOffset, linear, lw, lh, bytesPerPixel, gobBlocksInY); + } + + if (planes > 1) + { + surface.Buffer1 = ReadBuffer(gmm, offsets.ChromaUOffset, linear, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY); + } + + if (planes > 2) + { + surface.Buffer2 = ReadBuffer(gmm, offsets.ChromaVOffset, linear, cw, ch, 1, gobBlocksInY); + } + + return surface; + } + + private static ReadOnlySpan ReadBuffer( + MemoryManager gmm, + uint offset, + bool linear, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + int stride = GetPitch(width, bytesPerPixel); + + if (linear) + { + return gmm.GetSpan(ExtendOffset(offset), stride * height); + } + + return ReadBuffer(gmm, offset, width, height, stride, bytesPerPixel, gobBlocksInY); + } + + private static ReadOnlySpan ReadBuffer( + MemoryManager gmm, + uint offset, + int width, + int height, + int dstStride, + int bytesPerPixel, + int gobBlocksInY) + { + int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); + + ReadOnlySpan src = gmm.GetSpan(ExtendOffset(offset), inSize); + + Span dst = new byte[dstStride * height]; + + LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src); + + return dst; + } + } +} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs new file mode 100644 index 00000000..cab1ec80 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs @@ -0,0 +1,361 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Vic.Types; +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; + +namespace Ryujinx.Graphics.Vic.Image +{ + class SurfaceWriter + { + public static void Write(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) + { + switch (config.OutPixelFormat) + { + case PixelFormat.A8B8G8R8: + WriteA8B8G8R8(rm, input, ref config, ref offsets); + break; + case PixelFormat.Y8___V8U8_N420: + WriteNv12(rm, input, ref config, ref offsets); + break; + default: + Logger.PrintError(LogClass.Vic, $"Unsupported pixel format \"{config.OutPixelFormat}\"."); + break; + } + } + + private unsafe static void WriteA8B8G8R8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) + { + int width = input.Width; + int height = input.Height; + int stride = GetPitch(width, 4); + + int dstIndex = rm.BufferPool.Rent(height * stride, out Span dst); + + if (Sse2.IsSupported) + { + int widthTrunc = width & ~7; + int strideGap = stride - width * 4; + + fixed (Pixel* srcPtr = input.Data) + { + Pixel* ip = srcPtr; + + fixed (byte* dstPtr = dst) + { + byte* op = dstPtr; + + for (int y = 0; y < height; y++, ip += input.Width) + { + int x = 0; + + for (; x < widthTrunc; x += 8) + { + Vector128 pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x)); + Vector128 pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2)); + Vector128 pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4)); + Vector128 pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6)); + + pixel12 = Sse2.ShiftRightLogical(pixel12, 2); + pixel34 = Sse2.ShiftRightLogical(pixel34, 2); + pixel56 = Sse2.ShiftRightLogical(pixel56, 2); + pixel78 = Sse2.ShiftRightLogical(pixel78, 2); + + Vector128 pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16()); + Vector128 pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16()); + + Sse2.Store(op + 0x00, pixel1234); + Sse2.Store(op + 0x10, pixel5678); + + op += 0x20; + } + + for (; x < width; x++) + { + Pixel* px = ip + (uint)x; + + *(op + 0) = Downsample(px->R); + *(op + 1) = Downsample(px->G); + *(op + 2) = Downsample(px->B); + *(op + 3) = Downsample(px->A); + + op += 4; + } + + op += strideGap; + } + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int baseOffs = y * stride; + + for (int x = 0; x < width; x++) + { + int offs = baseOffs + x * 4; + + dst[offs + 0] = Downsample(input.GetR(x, y)); + dst[offs + 1] = Downsample(input.GetG(x, y)); + dst[offs + 2] = Downsample(input.GetB(x, y)); + dst[offs + 3] = Downsample(input.GetA(x, y)); + } + } + } + + bool outLinear = config.OutBlkKind == 0; + + int gobBlocksInY = 1 << config.OutBlkHeight; + + WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY); + + rm.BufferPool.Return(dstIndex); + } + + private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) + { + int gobBlocksInY = 1 << config.OutBlkHeight; + + bool outLinear = config.OutBlkKind == 0; + + int width = Math.Min(config.OutLumaWidth + 1, input.Width); + int height = Math.Min(config.OutLumaHeight + 1, input.Height); + int yStride = GetPitch(config.OutLumaWidth + 1, 1); + + int dstYIndex = rm.BufferPool.Rent((config.OutLumaHeight + 1) * yStride, out Span dstY); + + if (Sse41.IsSupported) + { + Vector128 mask = Vector128.Create(0xffffUL).AsUInt16(); + + int widthTrunc = width & ~0xf; + int strideGap = yStride - width; + + fixed (Pixel* srcPtr = input.Data) + { + Pixel* ip = srcPtr; + + fixed (byte* dstPtr = dstY) + { + byte* op = dstPtr; + + for (int y = 0; y < height; y++, ip += input.Width) + { + int x = 0; + + for (; x < widthTrunc; x += 16) + { + byte* baseOffset = (byte*)(ip + (ulong)(uint)x); + + Vector128 pixelp1 = Sse2.LoadVector128((ushort*)baseOffset); + Vector128 pixelp2 = Sse2.LoadVector128((ushort*)(baseOffset + 0x10)); + Vector128 pixelp3 = Sse2.LoadVector128((ushort*)(baseOffset + 0x20)); + Vector128 pixelp4 = Sse2.LoadVector128((ushort*)(baseOffset + 0x30)); + Vector128 pixelp5 = Sse2.LoadVector128((ushort*)(baseOffset + 0x40)); + Vector128 pixelp6 = Sse2.LoadVector128((ushort*)(baseOffset + 0x50)); + Vector128 pixelp7 = Sse2.LoadVector128((ushort*)(baseOffset + 0x60)); + Vector128 pixelp8 = Sse2.LoadVector128((ushort*)(baseOffset + 0x70)); + + pixelp1 = Sse2.And(pixelp1, mask); + pixelp2 = Sse2.And(pixelp2, mask); + pixelp3 = Sse2.And(pixelp3, mask); + pixelp4 = Sse2.And(pixelp4, mask); + pixelp5 = Sse2.And(pixelp5, mask); + pixelp6 = Sse2.And(pixelp6, mask); + pixelp7 = Sse2.And(pixelp7, mask); + pixelp8 = Sse2.And(pixelp8, mask); + + Vector128 pixelq1 = Sse41.PackUnsignedSaturate(pixelp1.AsInt32(), pixelp2.AsInt32()); + Vector128 pixelq2 = Sse41.PackUnsignedSaturate(pixelp3.AsInt32(), pixelp4.AsInt32()); + Vector128 pixelq3 = Sse41.PackUnsignedSaturate(pixelp5.AsInt32(), pixelp6.AsInt32()); + Vector128 pixelq4 = Sse41.PackUnsignedSaturate(pixelp7.AsInt32(), pixelp8.AsInt32()); + + pixelq1 = Sse41.PackUnsignedSaturate(pixelq1.AsInt32(), pixelq2.AsInt32()); + pixelq2 = Sse41.PackUnsignedSaturate(pixelq3.AsInt32(), pixelq4.AsInt32()); + + pixelq1 = Sse2.ShiftRightLogical(pixelq1, 2); + pixelq2 = Sse2.ShiftRightLogical(pixelq2, 2); + + Vector128 pixel = Sse2.PackUnsignedSaturate(pixelq1.AsInt16(), pixelq2.AsInt16()); + + Sse2.Store(op, pixel); + + op += 0x10; + } + + for (; x < width; x++) + { + Pixel* px = ip + (uint)x; + + *op++ = Downsample(px->R); + } + + op += strideGap; + } + } + } + } + else + { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + dstY[y * yStride + x] = Downsample(input.GetR(x, y)); + } + } + } + + WriteBuffer( + rm, + dstY, + offsets.LumaOffset, + outLinear, + config.OutLumaWidth + 1, + config.OutLumaHeight + 1, + 1, + gobBlocksInY); + + rm.BufferPool.Return(dstYIndex); + + int uvWidth = Math.Min(config.OutChromaWidth + 1, (width + 1) >> 1); + int uvHeight = Math.Min(config.OutChromaHeight + 1, (height + 1) >> 1); + int uvStride = GetPitch(config.OutChromaWidth + 1, 2); + + int dstUvIndex = rm.BufferPool.Rent((config.OutChromaHeight + 1) * uvStride, out Span dstUv); + + if (Sse2.IsSupported) + { + int widthTrunc = uvWidth & ~7; + int strideGap = uvStride - uvWidth * 2; + + fixed (Pixel* srcPtr = input.Data) + { + Pixel* ip = srcPtr; + + fixed (byte* dstPtr = dstUv) + { + byte* op = dstPtr; + + for (int y = 0; y < uvHeight; y++, ip += input.Width * 2) + { + int x = 0; + + for (; x < widthTrunc; x += 8) + { + byte* baseOffset = (byte*)ip + (ulong)(uint)x * 16; + + Vector128 pixel1 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x02)); + Vector128 pixel2 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x12)); + Vector128 pixel3 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x22)); + Vector128 pixel4 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x32)); + Vector128 pixel5 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x42)); + Vector128 pixel6 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x52)); + Vector128 pixel7 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x62)); + Vector128 pixel8 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x72)); + + Vector128 pixel12 = Sse2.UnpackLow(pixel1, pixel2); + Vector128 pixel34 = Sse2.UnpackLow(pixel3, pixel4); + Vector128 pixel56 = Sse2.UnpackLow(pixel5, pixel6); + Vector128 pixel78 = Sse2.UnpackLow(pixel7, pixel8); + + Vector128 pixel1234 = Sse2.UnpackLow(pixel12.AsUInt64(), pixel34.AsUInt64()); + Vector128 pixel5678 = Sse2.UnpackLow(pixel56.AsUInt64(), pixel78.AsUInt64()); + + pixel1234 = Sse2.ShiftRightLogical(pixel1234, 2); + pixel5678 = Sse2.ShiftRightLogical(pixel5678, 2); + + Vector128 pixel = Sse2.PackUnsignedSaturate(pixel1234.AsInt16(), pixel5678.AsInt16()); + + Sse2.Store(op, pixel); + + op += 0x10; + } + + for (; x < uvWidth; x++) + { + Pixel* px = ip + (uint)(x << 1); + + *op++ = Downsample(px->G); + *op++ = Downsample(px->B); + } + + op += strideGap; + } + } + } + } + else + { + for (int y = 0; y < uvHeight; y++) + { + for (int x = 0; x < uvWidth; x++) + { + int xx = x << 1; + int yy = y << 1; + + int uvOffs = y * uvStride + xx; + + dstUv[uvOffs + 0] = Downsample(input.GetG(xx, yy)); + dstUv[uvOffs + 1] = Downsample(input.GetB(xx, yy)); + } + } + } + + WriteBuffer( + rm, + dstUv, + offsets.ChromaUOffset, + outLinear, + config.OutChromaWidth + 1, + config.OutChromaHeight + 1, 2, + gobBlocksInY); + + rm.BufferPool.Return(dstUvIndex); + } + + private static void WriteBuffer( + ResourceManager rm, + ReadOnlySpan src, + uint offset, + bool linear, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + if (linear) + { + rm.Gmm.Write(ExtendOffset(offset), src); + return; + } + + WriteBuffer(rm, src, offset, width, height, bytesPerPixel, gobBlocksInY); + } + + private static void WriteBuffer( + ResourceManager rm, + ReadOnlySpan src, + uint offset, + int width, + int height, + int bytesPerPixel, + int gobBlocksInY) + { + int outSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); + int dstStride = GetPitch(width, bytesPerPixel); + + int dstIndex = rm.BufferPool.Rent(outSize, out Span dst); + + LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src); + + rm.Gmm.Write(ExtendOffset(offset), dst); + + rm.BufferPool.Return(dstIndex); + } + } +} diff --git a/Ryujinx.Graphics.Vic/ResourceManager.cs b/Ryujinx.Graphics.Vic/ResourceManager.cs new file mode 100644 index 00000000..036b30b6 --- /dev/null +++ b/Ryujinx.Graphics.Vic/ResourceManager.cs @@ -0,0 +1,19 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Vic.Image; + +namespace Ryujinx.Graphics.Vic +{ + struct ResourceManager + { + public MemoryManager Gmm { get; } + public BufferPool SurfacePool { get; } + public BufferPool BufferPool { get; } + + public ResourceManager(MemoryManager gmm, BufferPool surfacePool, BufferPool bufferPool) + { + Gmm = gmm; + SurfacePool = surfacePool; + BufferPool = bufferPool; + } + } +} diff --git a/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj b/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj new file mode 100644 index 00000000..f072fa36 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Ryujinx.Graphics.Vic.csproj @@ -0,0 +1,23 @@ + + + + netcoreapp3.1 + + + + true + + + + true + + + + + + + + + + + diff --git a/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs b/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs new file mode 100644 index 00000000..06d0f006 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/BitfieldExtensions.cs @@ -0,0 +1,39 @@ +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Vic.Types +{ + static class BitfieldExtensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool Extract(this int value, int lsb) + { + return ((value >> (lsb & 0x1f)) & 1) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Extract(this int value, int lsb, int length) + { + return (value >> (lsb & 0x1f)) & (int)(uint.MaxValue >> (32 - length)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool Extract(this long value, int lsb) + { + return ((int)(value >> (lsb & 0x3f)) & 1) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Extract(this long value, int lsb, int length) + { + return (int)(value >> (lsb & 0x3f)) & (int)(uint.MaxValue >> (32 - length)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ExtractSx(this long value, int lsb, int length) + { + int shift = lsb & 0x3f; + + return (int)((value << (64 - (shift + length))) >> (64 - length)); + } + } +} diff --git a/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs b/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs new file mode 100644 index 00000000..fc5d315e --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/BlendingSlotStruct.cs @@ -0,0 +1,27 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct BlendingSlotStruct + { + private long _word0; + private long _word1; + + public int AlphaK1 => _word0.Extract(0, 10); + public int AlphaK2 => _word0.Extract(16, 10); + public int SrcFactCMatchSelect => _word0.Extract(32, 3); + public int DstFactCMatchSelect => _word0.Extract(36, 3); + public int SrcFactAMatchSelect => _word0.Extract(40, 3); + public int DstFactAMatchSelect => _word0.Extract(44, 3); + public int OverrideR => _word1.Extract(66, 10); + public int OverrideG => _word1.Extract(76, 10); + public int OverrideB => _word1.Extract(86, 10); + public int OverrideA => _word1.Extract(96, 10); + public bool UseOverrideR => _word1.Extract(108); + public bool UseOverrideG => _word1.Extract(109); + public bool UseOverrideB => _word1.Extract(110); + public bool UseOverrideA => _word1.Extract(111); + public bool MaskR => _word1.Extract(112); + public bool MaskG => _word1.Extract(113); + public bool MaskB => _word1.Extract(114); + public bool MaskA => _word1.Extract(115); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs b/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs new file mode 100644 index 00000000..21b7b598 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/ClearRectStruct.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct ClearRectStruct + { + private long _word0; + private long _word1; + + public int ClearRect0Left => _word0.Extract(0, 14); + public int ClearRect0Right => _word0.Extract(16, 14); + public int ClearRect0Top => _word0.Extract(32, 14); + public int ClearRect0Bottom => _word0.Extract(48, 14); + public int ClearRect1Left => _word1.Extract(64, 14); + public int ClearRect1Right => _word1.Extract(80, 14); + public int ClearRect1Top => _word1.Extract(96, 14); + public int ClearRect1Bottom => _word1.Extract(112, 14); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs b/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs new file mode 100644 index 00000000..f1e6287a --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/ConfigStruct.cs @@ -0,0 +1,14 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Vic.Types +{ + struct ConfigStruct + { + public PipeConfig PipeConfig; + public OutputConfig OutputConfig; + public OutputSurfaceConfig OutputSurfaceConfig; + public MatrixStruct OutColorMatrix; + public Array4 ClearRectStruct; + public Array8 SlotStruct; + } +} diff --git a/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs b/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs new file mode 100644 index 00000000..df5e647e --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/LumaKeyStruct.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct LumaKeyStruct + { + private long _word0; + private long _word1; + + public int LumaCoeff0 => _word0.Extract(0, 20); + public int LumaCoeff1 => _word0.Extract(20, 20); + public int LumaCoeff2 => _word0.Extract(40, 20); + public int LumaRShift => _word0.Extract(60, 4); + public int LumaCoeff3 => _word1.Extract(64, 20); + public int LumaKeyLower => _word1.Extract(84, 10); + public int LumaKeyUpper => _word1.Extract(94, 10); + public bool LumaKeyEnabled => _word1.Extract(104); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs b/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs new file mode 100644 index 00000000..b9dcd8ff --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/MatrixStruct.cs @@ -0,0 +1,25 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct MatrixStruct + { + private long _word0; + private long _word1; + private long _word2; + private long _word3; + + public int MatrixCoeff00 => _word0.ExtractSx(0, 20); + public int MatrixCoeff10 => _word0.ExtractSx(20, 20); + public int MatrixCoeff20 => _word0.ExtractSx(40, 20); + public int MatrixRShift => _word0.Extract(60, 4); + public int MatrixCoeff01 => _word1.ExtractSx(64, 20); + public int MatrixCoeff11 => _word1.ExtractSx(84, 20); + public int MatrixCoeff21 => _word1.ExtractSx(104, 20); + public bool MatrixEnable => _word1.Extract(127); + public int MatrixCoeff02 => _word2.ExtractSx(128, 20); + public int MatrixCoeff12 => _word2.ExtractSx(148, 20); + public int MatrixCoeff22 => _word2.ExtractSx(168, 20); + public int MatrixCoeff03 => _word3.ExtractSx(192, 20); + public int MatrixCoeff13 => _word3.ExtractSx(212, 20); + public int MatrixCoeff23 => _word3.ExtractSx(232, 20); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/OutputConfig.cs b/Ryujinx.Graphics.Vic/Types/OutputConfig.cs new file mode 100644 index 00000000..8ab46fe5 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/OutputConfig.cs @@ -0,0 +1,23 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct OutputConfig + { + private long _word0; + private long _word1; + + public int AlphaFillMode => _word0.Extract(0, 3); + public int AlphaFillSlot => _word0.Extract(3, 3); + public int BackgroundAlpha => _word0.Extract(6, 10); + public int BackgroundR => _word0.Extract(16, 10); + public int BackgroundG => _word0.Extract(26, 10); + public int BackgroundB => _word0.Extract(36, 10); + public int RegammaMode => _word0.Extract(46, 2); + public bool OutputFlipX => _word0.Extract(48); + public bool OutputFlipY => _word0.Extract(49); + public bool OutputTranspose => _word0.Extract(50); + public int TargetRectLeft => _word1.Extract(64, 14); + public int TargetRectRight => _word1.Extract(80, 14); + public int TargetRectTop => _word1.Extract(96, 14); + public int TargetRectBottom => _word1.Extract(112, 14); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs b/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs new file mode 100644 index 00000000..9068cbbe --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/OutputSurfaceConfig.cs @@ -0,0 +1,20 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct OutputSurfaceConfig + { + private long _word0; + private long _word1; + + public PixelFormat OutPixelFormat => (PixelFormat)_word0.Extract(0, 7); + public int OutChromaLocHoriz => _word0.Extract(7, 2); + public int OutChromaLocVert => _word0.Extract(9, 2); + public int OutBlkKind => _word0.Extract(11, 4); + public int OutBlkHeight => _word0.Extract(15, 4); + public int OutSurfaceWidth => _word0.Extract(32, 14); + public int OutSurfaceHeight => _word0.Extract(46, 14); + public int OutLumaWidth => _word1.Extract(64, 14); + public int OutLumaHeight => _word1.Extract(78, 14); + public int OutChromaWidth => _word1.Extract(96, 14); + public int OutChromaHeight => _word1.Extract(110, 14); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/PipeConfig.cs b/Ryujinx.Graphics.Vic/Types/PipeConfig.cs new file mode 100644 index 00000000..72d8cc99 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/PipeConfig.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct PipeConfig + { + private long _word0; + private long _word1; + + public int DownsampleHoriz => _word0.Extract(0, 11); + public int DownsampleVert => _word0.Extract(16, 11); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/PixelFormat.cs b/Ryujinx.Graphics.Vic/Types/PixelFormat.cs new file mode 100644 index 00000000..72dc7899 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/PixelFormat.cs @@ -0,0 +1,81 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + enum PixelFormat + { + A8, + L8, + A4L4, + L4A4, + R8, + A8L8, + L8A8, + R8G8, + G8R8, + B5G6R5, + R5G6B5, + B6G5R5, + R5G5B6, + A1B5G5R5, + A1R5G5B5, + B5G5R5A1, + R5G5B5A1, + A5B5G5R1, + A5R1G5B5, + B5G5R1A5, + R1G5B5A5, + X1B5G5R5, + X1R5G5B5, + B5G5R5X1, + R5G5B5X1, + A4B4G4R4, + A4R4G4B4, + B4G4R4A4, + R4G4B4A4, + B8_G8_R8, + R8_G8_B8, + A8B8G8R8, + A8R8G8B8, + B8G8R8A8, + R8G8B8A8, + X8B8G8R8, + X8R8G8B8, + B8G8R8X8, + R8G8B8X8, + A2B10G10R10, + A2R10G10B10, + B10G10R10A2, + R10G10B10A2, + A4P4, + P4A4, + P8A845, + A8P8, + P8, + P1, + U8V8, + V8U8, + A8Y8U8V8, + V8U8Y8A8, + Y8_U8_V8, + Y8_V8_U8, + U8_V8_Y8, + V8_U8_Y8, + Y8_U8__Y8_V8, + Y8_V8__Y8_U8, + U8_Y8__V8_Y8, + V8_Y8__U8_Y8, + Y8___U8V8_N444, + Y8___V8U8_N444, + Y8___U8V8_N422, + Y8___V8U8_N422, + Y8___U8V8_N422R, + Y8___V8U8_N422R, + Y8___U8V8_N420, + Y8___V8U8_N420, + Y8___U8___V8_N444, + Y8___U8___V8_N422, + Y8___U8___V8_N422R, + Y8___U8___V8_N420, + U8, + V8 + } +} diff --git a/Ryujinx.Graphics.Vic/Types/SlotConfig.cs b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs new file mode 100644 index 00000000..183ee4ac --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/SlotConfig.cs @@ -0,0 +1,63 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct SlotConfig + { + private long _word0; + private long _word1; + private long _word2; + private long _word3; + private long _word4; + private long _word5; + private long _word6; + private long _word7; + + public bool SlotEnable => _word0.Extract(0); + public bool DeNoise => _word0.Extract(1); + public bool AdvancedDenoise => _word0.Extract(2); + public bool CadenceDetect => _word0.Extract(3); + public bool MotionMap => _word0.Extract(4); + public bool MMapCombine => _word0.Extract(5); + public bool IsEven => _word0.Extract(6); + public bool ChromaEven => _word0.Extract(7); + public bool CurrentFieldEnable => _word0.Extract(8); + public bool PrevFieldEnable => _word0.Extract(9); + public bool NextFieldEnable => _word0.Extract(10); + public bool NextNrFieldEnable => _word0.Extract(11); + public bool CurMotionFieldEnable => _word0.Extract(12); + public bool PrevMotionFieldEnable => _word0.Extract(13); + public bool PpMotionFieldEnable => _word0.Extract(14); + public bool CombMotionFieldEnable => _word0.Extract(15); + public int FrameFormat => _word0.Extract(16, 4); + public int FilterLengthY => _word0.Extract(20, 2); + public int FilterLengthX => _word0.Extract(22, 2); + public int Panoramic => _word0.Extract(24, 12); + public int DetailFltClamp => _word0.Extract(58, 6); + public int FilterNoise => _word1.Extract(64, 10); + public int FilterDetail => _word1.Extract(74, 10); + public int ChromaNoise => _word1.Extract(84, 10); + public int ChromaDetail => _word1.Extract(94, 10); + public int DeinterlaceMode => _word1.Extract(104, 4); + public int MotionAccumWeight => _word1.Extract(108, 3); + public int NoiseIir => _word1.Extract(111, 11); + public int LightLevel => _word1.Extract(122, 4); + public int SoftClampLow => _word2.Extract(128, 10); + public int SoftClampHigh => _word2.Extract(138, 10); + public int PlanarAlpha => _word2.Extract(160, 10); + public bool ConstantAlpha => _word2.Extract(170); + public int StereoInterleave => _word2.Extract(171, 3); + public bool ClipEnabled => _word2.Extract(174); + public int ClearRectMask => _word2.Extract(175, 8); + public int DegammaMode => _word2.Extract(183, 2); + public bool DecompressEnable => _word2.Extract(186); + public int DecompressCtbCount => _word3.Extract(192, 8); + public int DecompressZbcColor => _word3.Extract(200, 32); + public int SourceRectLeft => _word4.Extract(256, 30); + public int SourceRectRight => _word4.Extract(288, 30); + public int SourceRectTop => _word5.Extract(320, 30); + public int SourceRectBottom => _word5.Extract(352, 30); + public int DstRectLeft => _word6.Extract(384, 14); + public int DstRectRight => _word6.Extract(400, 14); + public int DstRectTop => _word6.Extract(416, 14); + public int DstRectBottom => _word6.Extract(432, 14); + } +} diff --git a/Ryujinx.Graphics.Vic/Types/SlotStruct.cs b/Ryujinx.Graphics.Vic/Types/SlotStruct.cs new file mode 100644 index 00000000..96c6cce5 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/SlotStruct.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct SlotStruct + { + public SlotConfig SlotConfig; + public SlotSurfaceConfig SlotSurfaceConfig; + public LumaKeyStruct LumaKeyStruct; + public MatrixStruct ColorMatrixStruct; + public MatrixStruct GamutMatrixStruct; + public BlendingSlotStruct BlendingSlotStruct; + } +} diff --git a/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs b/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs new file mode 100644 index 00000000..7396afa1 --- /dev/null +++ b/Ryujinx.Graphics.Vic/Types/SlotSurfaceConfig.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Vic.Types +{ + struct SlotSurfaceConfig + { + private long _word0; + private long _word1; + + public PixelFormat SlotPixelFormat => (PixelFormat)_word0.Extract(0, 7); + public int SlotChromaLocHoriz => _word0.Extract(7, 2); + public int SlotChromaLocVert => _word0.Extract(9, 2); + public int SlotBlkKind => _word0.Extract(11, 4); + public int SlotBlkHeight => _word0.Extract(15, 4); + public int SlotCacheWidth => _word0.Extract(19, 3); + public int SlotSurfaceWidth => _word0.Extract(32, 14); + public int SlotSurfaceHeight => _word0.Extract(46, 14); + public int SlotLumaWidth => _word1.Extract(64, 14); + public int SlotLumaHeight => _word1.Extract(78, 14); + public int SlotChromaWidth => _word1.Extract(96, 14); + public int SlotChromaHeight => _word1.Extract(110, 14); + } +} diff --git a/Ryujinx.Graphics.Vic/VicDevice.cs b/Ryujinx.Graphics.Vic/VicDevice.cs new file mode 100644 index 00000000..db4fe513 --- /dev/null +++ b/Ryujinx.Graphics.Vic/VicDevice.cs @@ -0,0 +1,97 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Vic.Image; +using Ryujinx.Graphics.Vic.Types; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Vic +{ + public class VicDevice : IDeviceState + { + private readonly MemoryManager _gmm; + private readonly ResourceManager _rm; + private readonly DeviceState _state; + + private PlaneOffsets _overrideOffsets; + private bool _hasOverride; + + public VicDevice(MemoryManager gmm) + { + _gmm = gmm; + _rm = new ResourceManager(gmm, new BufferPool(), new BufferPool()); + _state = new DeviceState(new Dictionary + { + { nameof(VicRegisters.Execute), new RwCallback(Execute, null) } + }); + } + + /// + /// Overrides all input surfaces with a custom surface. + /// + /// Offset of the luma plane or packed data for this surface + /// Offset of the U chroma plane (for planar formats) or both chroma planes (for semiplanar formats) + /// Offset of the V chroma plane for planar formats + public void SetSurfaceOverride(uint lumaOffset, uint chromaUOffset, uint chromaVOffset) + { + _overrideOffsets.LumaOffset = lumaOffset; + _overrideOffsets.ChromaUOffset = chromaUOffset; + _overrideOffsets.ChromaVOffset = chromaVOffset; + _hasOverride = true; + } + + /// + /// Disables overriding input surfaces. + /// + /// + /// Surface overrides are disabled by default. + /// Call this if you previously called and which to disable it. + /// + public void DisableSurfaceOverride() + { + _hasOverride = false; + } + + public int Read(int offset) => _state.Read(offset); + public void Write(int offset, int data) => _state.Write(offset, data); + + private void Execute(int data) + { + ConfigStruct config = ReadIndirect(_state.State.SetConfigStructOffset); + + using Surface output = new Surface( + _rm.SurfacePool, + config.OutputSurfaceConfig.OutSurfaceWidth + 1, + config.OutputSurfaceConfig.OutSurfaceHeight + 1); + + for (int i = 0; i < config.SlotStruct.Length; i++) + { + ref SlotStruct slot = ref config.SlotStruct[i]; + + if (!slot.SlotConfig.SlotEnable) + { + continue; + } + + var offsets = _state.State.SetSurfacexSlotx[i][0]; + + if (_hasOverride) + { + offsets = _overrideOffsets; + } + + using Surface src = SurfaceReader.Read(_rm, ref slot.SlotSurfaceConfig, ref offsets); + + Blender.BlendOne(output, src, ref slot); + } + + SurfaceWriter.Write(_rm, output, ref config.OutputSurfaceConfig, ref _state.State.SetOutputSurface); + } + + private T ReadIndirect(uint offset) where T : unmanaged + { + return _gmm.Read((ulong)offset << 8); + } + } +} diff --git a/Ryujinx.Graphics.Vic/VicRegisters.cs b/Ryujinx.Graphics.Vic/VicRegisters.cs new file mode 100644 index 00000000..71dc9eed --- /dev/null +++ b/Ryujinx.Graphics.Vic/VicRegisters.cs @@ -0,0 +1,47 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Vic +{ + struct PlaneOffsets + { + public uint LumaOffset; + public uint ChromaUOffset; + public uint ChromaVOffset; + } + + struct VicRegisters + { + public Array64 Reserved0; + public uint Nop; + public Array15 Reserved104; + public uint PmTrigger; + public Array47 Reserved144; + public uint SetApplicationID; + public uint SetWatchdogTimer; + public Array14 Reserved208; + public uint SemaphoreA; + public uint SemaphoreB; + public uint SemaphoreC; + public uint CtxSaveArea; + public uint CtxSwitch; + public Array43 Reserved254; + public uint Execute; + public uint SemaphoreD; + public Array62 Reserved308; + public Array8> SetSurfacexSlotx; + public uint SetPictureIndex; + public uint SetControlParams; + public uint SetConfigStructOffset; + public uint SetFilterStructOffset; + public uint SetPaletteOffset; + public uint SetHistOffset; + public uint SetContextId; + public uint SetFceUcodeSize; + public PlaneOffsets SetOutputSurface; + public uint SetFceUcodeOffset; + public Array4 Reserved730; + public Array8 SetSlotContextId; + public Array8 SetCompTagBufferOffset; + public Array8 SetHistoryBufferOffset; + } +} diff --git a/Ryujinx.Graphics.Video/H264PictureInfo.cs b/Ryujinx.Graphics.Video/H264PictureInfo.cs new file mode 100644 index 00000000..3b2c2fff --- /dev/null +++ b/Ryujinx.Graphics.Video/H264PictureInfo.cs @@ -0,0 +1,47 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Video +{ + public struct H264PictureInfo + { + public Array2 FieldOrderCnt; + public bool IsReference; + public ushort ChromaFormatIdc; + public ushort FrameNum; + public bool FieldPicFlag; + public bool BottomFieldFlag; + public uint NumRefFrames; + public bool MbAdaptiveFrameFieldFlag; + public bool ConstrainedIntraPredFlag; + public bool WeightedPredFlag; + public uint WeightedBipredIdc; + public bool FrameMbsOnlyFlag; + public bool Transform8x8ModeFlag; + public int ChromaQpIndexOffset; + public int SecondChromaQpIndexOffset; + public int PicInitQpMinus26; + public uint NumRefIdxL0ActiveMinus1; + public uint NumRefIdxL1ActiveMinus1; + public uint Log2MaxFrameNumMinus4; + public uint PicOrderCntType; + public uint Log2MaxPicOrderCntLsbMinus4; + public bool DeltaPicOrderAlwaysZeroFlag; + public bool Direct8x8InferenceFlag; + public bool EntropyCodingModeFlag; + public bool PicOrderPresentFlag; + public bool DeblockingFilterControlPresentFlag; + public bool RedundantPicCntPresentFlag; + public uint NumSliceGroupsMinus1; + public uint SliceGroupMapType; + public uint SliceGroupChangeRateMinus1; + // TODO: Slice group map + public bool FmoAsoEnable; + public bool ScalingMatrixPresent; + public Array6> ScalingLists4x4; + public Array2> ScalingLists8x8; + public uint FrameType; + public uint PicWidthInMbsMinus1; + public uint PicHeightInMapUnitsMinus1; + public bool QpprimeYZeroTransformBypassFlag; + } +} diff --git a/Ryujinx.Graphics.Video/IDecoder.cs b/Ryujinx.Graphics.Video/IDecoder.cs new file mode 100644 index 00000000..5957af08 --- /dev/null +++ b/Ryujinx.Graphics.Video/IDecoder.cs @@ -0,0 +1,11 @@ +using System; + +namespace Ryujinx.Graphics.Video +{ + public interface IDecoder : IDisposable + { + bool IsHardwareAccelerated { get; } + + ISurface CreateSurface(int width, int height); + } +} diff --git a/Ryujinx.Graphics.Video/IH264Decoder.cs b/Ryujinx.Graphics.Video/IH264Decoder.cs new file mode 100644 index 00000000..127b9412 --- /dev/null +++ b/Ryujinx.Graphics.Video/IH264Decoder.cs @@ -0,0 +1,9 @@ +using System; + +namespace Ryujinx.Graphics.Video +{ + public interface IH264Decoder : IDecoder + { + bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan bitstream); + } +} diff --git a/Ryujinx.Graphics.Video/ISurface.cs b/Ryujinx.Graphics.Video/ISurface.cs new file mode 100644 index 00000000..fb66f31a --- /dev/null +++ b/Ryujinx.Graphics.Video/ISurface.cs @@ -0,0 +1,18 @@ +using System; + +namespace Ryujinx.Graphics.Video +{ + public interface ISurface : IDisposable + { + Plane YPlane { get; } + Plane UPlane { get; } + Plane VPlane { get; } + + int Width { get; } + int Height { get; } + int Stride { get; } + int UvWidth { get; } + int UvHeight { get; } + int UvStride { get; } + } +} diff --git a/Ryujinx.Graphics.Video/IVp9Decoder.cs b/Ryujinx.Graphics.Video/IVp9Decoder.cs new file mode 100644 index 00000000..ac79bc42 --- /dev/null +++ b/Ryujinx.Graphics.Video/IVp9Decoder.cs @@ -0,0 +1,14 @@ +using System; + +namespace Ryujinx.Graphics.Video +{ + public interface IVp9Decoder : IDecoder + { + bool Decode( + ref Vp9PictureInfo pictureInfo, + ISurface output, + ReadOnlySpan bitstream, + ReadOnlySpan mvsIn, + Span mvsOut); + } +} diff --git a/Ryujinx.Graphics.Video/Plane.cs b/Ryujinx.Graphics.Video/Plane.cs new file mode 100644 index 00000000..c0aca59c --- /dev/null +++ b/Ryujinx.Graphics.Video/Plane.cs @@ -0,0 +1,42 @@ +using System; +using System.Diagnostics.CodeAnalysis; + +namespace Ryujinx.Graphics.Video +{ + public struct Plane : IEquatable + { + public IntPtr Pointer { get; } + public int Length { get; } + + public Plane(IntPtr pointer, int length) + { + Pointer = pointer; + Length = length; + } + + public override bool Equals(object obj) + { + return obj is Plane other && Equals(other); + } + + public bool Equals([AllowNull] Plane other) + { + return Pointer == other.Pointer && Length == other.Length; + } + + public override int GetHashCode() + { + return HashCode.Combine(Pointer, Length); + } + + public static bool operator ==(Plane left, Plane right) + { + return left.Equals(right); + } + + public static bool operator !=(Plane left, Plane right) + { + return !(left == right); + } + } +} diff --git a/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj b/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj new file mode 100644 index 00000000..6710726c --- /dev/null +++ b/Ryujinx.Graphics.Video/Ryujinx.Graphics.Video.csproj @@ -0,0 +1,11 @@ + + + + netcoreapp3.1 + + + + + + + diff --git a/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs b/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs new file mode 100644 index 00000000..a3aa4de7 --- /dev/null +++ b/Ryujinx.Graphics.Video/Vp9BackwardUpdates.cs @@ -0,0 +1,32 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Video +{ + public struct Vp9BackwardUpdates + { + public Array4> YMode; + public Array10> UvMode; + public Array16> Partition; + public Array4>>>>> Coef; + public Array4>>>> EobBranch; + public Array4> SwitchableInterp; + public Array7> InterMode; + public Array4> IntraInter; + public Array5> CompInter; + public Array5>> SingleRef; + public Array5> CompRef; + public Array2> Tx32x32; + public Array2> Tx16x16; + public Array2> Tx8x8; + public Array3> Skip; + public Array4 Joints; + public Array2> Sign; + public Array2> Classes; + public Array2> Class0; + public Array2>> Bits; + public Array2>> Class0Fp; + public Array2> Fp; + public Array2> Class0Hp; + public Array2> Hp; + } +} diff --git a/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs b/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs new file mode 100644 index 00000000..10b997a5 --- /dev/null +++ b/Ryujinx.Graphics.Video/Vp9EntropyProbs.cs @@ -0,0 +1,36 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Video +{ + public struct Vp9EntropyProbs + { + public Array10>> KfYModeProb; + public Array7 SegTreeProb; + public Array3 SegPredProb; + public Array10> KfUvModeProb; + public Array4> YModeProb; + public Array10> UvModeProb; + public Array16> KfPartitionProb; + public Array16> PartitionProb; + public Array4>>>>> CoefProbs; + public Array4> SwitchableInterpProb; + public Array7> InterModeProb; + public Array4 IntraInterProb; + public Array5 CompInterProb; + public Array5> SingleRefProb; + public Array5 CompRefProb; + public Array2> Tx32x32Prob; + public Array2> Tx16x16Prob; + public Array2> Tx8x8Prob; + public Array3 SkipProb; + public Array3 Joints; + public Array2 Sign; + public Array2> Classes; + public Array2> Class0; + public Array2> Bits; + public Array2>> Class0Fp; + public Array2> Fp; + public Array2 Class0Hp; + public Array2 Hp; + } +} diff --git a/Ryujinx.Graphics.Video/Vp9Mv.cs b/Ryujinx.Graphics.Video/Vp9Mv.cs new file mode 100644 index 00000000..9de41058 --- /dev/null +++ b/Ryujinx.Graphics.Video/Vp9Mv.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Video +{ + public struct Vp9Mv + { + public short Row; + public short Col; + } +} diff --git a/Ryujinx.Graphics.Video/Vp9MvRef.cs b/Ryujinx.Graphics.Video/Vp9MvRef.cs new file mode 100644 index 00000000..6f2d8e81 --- /dev/null +++ b/Ryujinx.Graphics.Video/Vp9MvRef.cs @@ -0,0 +1,11 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Video +{ + // This must match the structure used by NVDEC, do not modify. + public struct Vp9MvRef + { + public Array2 Mvs; + public Array2 RefFrames; + } +} diff --git a/Ryujinx.Graphics.Video/Vp9PictureInfo.cs b/Ryujinx.Graphics.Video/Vp9PictureInfo.cs new file mode 100644 index 00000000..a5cc2b45 --- /dev/null +++ b/Ryujinx.Graphics.Video/Vp9PictureInfo.cs @@ -0,0 +1,39 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Video +{ + public ref struct Vp9PictureInfo + { + public ISurface LastReference; + public ISurface GoldenReference; + public ISurface AltReference; + public bool IsKeyFrame; + public bool IntraOnly; + public Array4 RefFrameSignBias; + public int BaseQIndex; + public int YDcDeltaQ; + public int UvDcDeltaQ; + public int UvAcDeltaQ; + public bool Lossless; + public int TransformMode; + public bool AllowHighPrecisionMv; + public int InterpFilter; + public int ReferenceMode; + public sbyte CompFixedRef; + public Array2 CompVarRef; + public int Log2TileCols; + public int Log2TileRows; + public bool SegmentEnabled; + public bool SegmentMapUpdate; + public bool SegmentMapTemporalUpdate; + public int SegmentAbsDelta; + public Array8 SegmentFeatureEnable; + public Array8> SegmentFeatureData; + public bool ModeRefDeltaEnabled; + public bool UsePrevInFindMvRefs; + public Array4 RefDeltas; + public Array2 ModeDeltas; + public Vp9EntropyProbs Entropy; + public Vp9BackwardUpdates BackwardUpdateCounts; + } +} diff --git a/Ryujinx.HLE/HOS/Horizon.cs b/Ryujinx.HLE/HOS/Horizon.cs index b3af3290..f302e98a 100644 --- a/Ryujinx.HLE/HOS/Horizon.cs +++ b/Ryujinx.HLE/HOS/Horizon.cs @@ -293,8 +293,6 @@ namespace Ryujinx.HLE.HOS KernelContext.ThreadCounter.Wait(); KernelContext.Dispose(); - - Device.Unload(); } } } diff --git a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs index ce7314f4..d6cc85e9 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/INvDrvServices.cs @@ -60,6 +60,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv NvDeviceFile deviceFile = (NvDeviceFile)constructor.Invoke(new object[] { context }); + deviceFile.Path = path; + return _deviceFileIdRegistry.Add(deviceFile); } else diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs index fe3ae652..e426945d 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs @@ -14,6 +14,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices public readonly ServiceCtx Context; public readonly KProcess Owner; + public string Path; + public NvDeviceFile(ServiceCtx context) { Context = context; diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs index 208bec3b..70c9a47b 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/NvHostChannelDeviceFile.cs @@ -1,10 +1,10 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gpu.Memory; -using Ryujinx.HLE.HOS.Services.Nv.Types; using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostAsGpu; using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel.Types; using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl; using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvMap; +using Ryujinx.HLE.HOS.Services.Nv.Types; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -130,28 +130,56 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel private NvInternalResult Submit(Span arguments) { - int headerSize = Unsafe.SizeOf(); - SubmitArguments submitHeader = MemoryMarshal.Cast(arguments)[0]; - Span commandBufferEntries = MemoryMarshal.Cast(arguments.Slice(headerSize)).Slice(0, submitHeader.CmdBufsCount); - MemoryManager gmm = NvHostAsGpuDeviceFile.GetAddressSpaceContext(Context).Gmm; + SubmitArguments submitHeader = GetSpanAndSkip(ref arguments, 1)[0]; + Span commandBuffers = GetSpanAndSkip(ref arguments, submitHeader.CmdBufsCount); + Span relocs = GetSpanAndSkip(ref arguments, submitHeader.RelocsCount); + Span relocShifts = GetSpanAndSkip(ref arguments, submitHeader.RelocsCount); + Span syncptIncrs = GetSpanAndSkip(ref arguments, submitHeader.SyncptIncrsCount); + Span waitChecks = GetSpanAndSkip(ref arguments, submitHeader.SyncptIncrsCount); // ? + Span fences = GetSpanAndSkip(ref arguments, submitHeader.FencesCount); - foreach (CommandBuffer commandBufferEntry in commandBufferEntries) + lock (_device) { - NvMapHandle map = NvMapDeviceFile.GetMapFromHandle(Owner, commandBufferEntry.MemoryId); - - int[] commandBufferData = new int[commandBufferEntry.WordsCount]; - - for (int offset = 0; offset < commandBufferData.Length; offset++) + for (int i = 0; i < syncptIncrs.Length; i++) { - commandBufferData[offset] = _memory.Read((ulong)(map.Address + commandBufferEntry.Offset + offset * 4)); + SyncptIncr syncptIncr = syncptIncrs[i]; + + uint id = syncptIncr.Id; + + fences[i].Id = id; + fences[i].Thresh = Context.Device.System.HostSyncpoint.IncrementSyncpointMax(id, syncptIncr.Incrs); } - // TODO: Submit command to engines. + foreach (CommandBuffer commandBuffer in commandBuffers) + { + NvMapHandle map = NvMapDeviceFile.GetMapFromHandle(Owner, commandBuffer.Mem); + + var data = _memory.GetSpan((ulong)map.Address + commandBuffer.Offset, commandBuffer.WordsCount * 4); + + _device.Host1x.Submit(MemoryMarshal.Cast(data)); + } } + fences[0].Thresh = Context.Device.System.HostSyncpoint.IncrementSyncpointMax(fences[0].Id, 1); + + Span tmpCmdBuff = stackalloc int[1]; + + tmpCmdBuff[0] = (4 << 28) | (int)fences[0].Id; + + _device.Host1x.Submit(tmpCmdBuff); + return NvInternalResult.Success; } + private Span GetSpanAndSkip(ref Span arguments, int count) where T : unmanaged + { + Span output = MemoryMarshal.Cast(arguments).Slice(0, count); + + arguments = arguments.Slice(Unsafe.SizeOf() * count); + + return output; + } + private NvInternalResult GetSyncpoint(ref GetParameterArguments arguments) { if (arguments.Parameter >= MaxModuleSyncpoint) @@ -248,9 +276,13 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel { if (map.DmaMapAddress != 0) { - gmm.Free((ulong)map.DmaMapAddress, (uint)map.Size); + // FIXME: + // To make unmapping work, we need separate address space per channel. + // Right now NVDEC and VIC share the GPU address space which is not correct at all. - map.DmaMapAddress = 0; + // gmm.Free((ulong)map.DmaMapAddress, (uint)map.Size); + + // map.DmaMapAddress = 0; } } } diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs index bb2fd1cc..7ef7e39e 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostChannel/Types/SubmitArguments.cs @@ -5,17 +5,40 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostChannel.Types [StructLayout(LayoutKind.Sequential)] struct CommandBuffer { - public int MemoryId; - public int Offset; + public int Mem; + public uint Offset; public int WordsCount; } + [StructLayout(LayoutKind.Sequential)] + struct Reloc + { + public int CmdbufMem; + public int CmdbufOffset; + public int Target; + public int TargetOffset; + } + + [StructLayout(LayoutKind.Sequential)] + struct SyncptIncr + { + public uint Id; + public uint Incrs; + } + + [StructLayout(LayoutKind.Sequential)] + struct Fence + { + public uint Id; + public uint Thresh; + } + [StructLayout(LayoutKind.Sequential)] struct SubmitArguments { public int CmdBufsCount; public int RelocsCount; public int SyncptIncrsCount; - public int WaitchecksCount; + public int FencesCount; } } diff --git a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs index ff56fbf5..aa730b57 100644 --- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs +++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostSyncPt.cs @@ -1,6 +1,5 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.Gpu.Synchronization; -using Ryujinx.HLE.HOS.Kernel.Threading; using System; using System.Threading; @@ -172,6 +171,11 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl return (uint)Interlocked.Increment(ref _counterMax[id]); } + public uint IncrementSyncpointMax(uint id, uint incrs) + { + return (uint)Interlocked.Add(ref _counterMax[id], (int)incrs); + } + public bool IsSyncpointExpired(uint id, uint threshold) { return MinCompare(id, _counterMin[id], _counterMax[id], (int)threshold); diff --git a/Ryujinx.HLE/Ryujinx.HLE.csproj b/Ryujinx.HLE/Ryujinx.HLE.csproj index b05a9087..01e20792 100644 --- a/Ryujinx.HLE/Ryujinx.HLE.csproj +++ b/Ryujinx.HLE/Ryujinx.HLE.csproj @@ -47,6 +47,9 @@ + + + diff --git a/Ryujinx.HLE/Switch.cs b/Ryujinx.HLE/Switch.cs index 5713bd9e..9defe25d 100644 --- a/Ryujinx.HLE/Switch.cs +++ b/Ryujinx.HLE/Switch.cs @@ -3,6 +3,9 @@ using Ryujinx.Audio; using Ryujinx.Configuration; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu; +using Ryujinx.Graphics.Host1x; +using Ryujinx.Graphics.Nvdec; +using Ryujinx.Graphics.Vic; using Ryujinx.HLE.FileSystem; using Ryujinx.HLE.FileSystem.Content; using Ryujinx.HLE.HOS; @@ -11,7 +14,6 @@ using Ryujinx.HLE.HOS.Services.Hid; using Ryujinx.HLE.HOS.SystemState; using Ryujinx.Memory; using System; -using System.Threading; namespace Ryujinx.HLE { @@ -23,6 +25,8 @@ namespace Ryujinx.HLE public GpuContext Gpu { get; private set; } + internal Host1xDevice Host1x { get; } + public VirtualFileSystem FileSystem { get; private set; } public Horizon System { get; private set; } @@ -53,6 +57,27 @@ namespace Ryujinx.HLE Gpu = new GpuContext(renderer); + Host1x = new Host1xDevice(Gpu.Synchronization); + var nvdec = new NvdecDevice(Gpu.MemoryManager); + var vic = new VicDevice(Gpu.MemoryManager); + Host1x.RegisterDevice(ClassId.Nvdec, nvdec); + Host1x.RegisterDevice(ClassId.Vic, vic); + + nvdec.FrameDecoded += (FrameDecodedEventArgs e) => + { + // FIXME: + // Figure out what is causing frame ordering issues on H264. + // For now this is needed as workaround. + if (e.CodecId == CodecId.H264) + { + vic.SetSurfaceOverride(e.LumaOffset, e.ChromaOffset, 0); + } + else + { + vic.DisableSurfaceOverride(); + } + }; + FileSystem = fileSystem; System = new Horizon(this, contentManager); @@ -136,13 +161,6 @@ namespace Ryujinx.HLE Gpu.Window.Present(swapBuffersCallback); } - internal void Unload() - { - FileSystem.Unload(); - - Memory.Dispose(); - } - public void DisposeGpu() { Gpu.Dispose(); @@ -158,7 +176,10 @@ namespace Ryujinx.HLE if (disposing) { System.Dispose(); + Host1x.Dispose(); AudioOut.Dispose(); + FileSystem.Unload(); + Memory.Dispose(); } } } diff --git a/Ryujinx.Memory/MemoryBlock.cs b/Ryujinx.Memory/MemoryBlock.cs index 850fb115..37439a8a 100644 --- a/Ryujinx.Memory/MemoryBlock.cs +++ b/Ryujinx.Memory/MemoryBlock.cs @@ -135,13 +135,13 @@ namespace Ryujinx.Memory public void Copy(ulong srcOffset, ulong dstOffset, ulong size) { const int MaxChunkSize = 1 << 30; - + for (ulong offset = 0; offset < size; offset += MaxChunkSize) { int copySize = (int)Math.Min(MaxChunkSize, size - offset); Write(dstOffset + offset, GetSpan(srcOffset + offset, copySize)); - } + } } /// @@ -225,7 +225,7 @@ namespace Ryujinx.Memory } /// - /// Gets the span of a given memory block region. + /// Gets the of a given memory block region. /// /// Start offset of the memory region /// Size in bytes of the region @@ -238,6 +238,20 @@ namespace Ryujinx.Memory return new Span((void*)GetPointer(offset, size), size); } + /// + /// Gets the of a given memory block region. + /// + /// Start offset of the memory region + /// Size in bytes of the region + /// Memory of the memory region + /// Throw when the memory block has already been disposed + /// Throw when either or are out of range + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe Memory GetMemory(ulong offset, int size) + { + return new NativeMemoryManager((byte*)GetPointer(offset, size), size).Memory; + } + /// /// Adds a 64-bits offset to a native pointer. /// diff --git a/Ryujinx.Memory/NativeMemoryManager.cs b/Ryujinx.Memory/NativeMemoryManager.cs new file mode 100644 index 00000000..ef599dd4 --- /dev/null +++ b/Ryujinx.Memory/NativeMemoryManager.cs @@ -0,0 +1,42 @@ +using System; +using System.Buffers; + +namespace Ryujinx.Memory +{ + unsafe class NativeMemoryManager : MemoryManager where T : unmanaged + { + private readonly T* _pointer; + private readonly int _length; + + public NativeMemoryManager(T* pointer, int length) + { + _pointer = pointer; + _length = length; + } + + public override Span GetSpan() + { + return new Span((void*)_pointer, _length); + } + + public override MemoryHandle Pin(int elementIndex = 0) + { + if ((uint)elementIndex >= _length) + { + throw new ArgumentOutOfRangeException(nameof(elementIndex)); + } + + return new MemoryHandle((void*)(_pointer + elementIndex)); + } + + public override void Unpin() + { + // No need to do anything as pointer already points no native memory, not GC tracked. + } + + protected override void Dispose(bool disposing) + { + // Nothing to dispose, MemoryBlock still owns the memory. + } + } +} diff --git a/Ryujinx.sln b/Ryujinx.sln index 5ea6934e..d5e85c2a 100644 --- a/Ryujinx.sln +++ b/Ryujinx.sln @@ -44,6 +44,18 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Memory.Tests", "Ryu EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Cpu", "Ryujinx.Cpu\Ryujinx.Cpu.csproj", "{3DF35E3D-D844-4399-A9A1-A9E923264C17}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Device", "Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj", "{C3002C3C-7B09-4FE7-894A-372EDA22FC6E}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Host1x", "Ryujinx.Graphics.Host1x\Ryujinx.Graphics.Host1x.csproj", "{C35F1536-7DE5-4F9D-9604-B5B4E1561947}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.Vp9", "Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj", "{B9AECA11-E248-4886-A10B-81B631CAAF29}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Vic", "Ryujinx.Graphics.Vic\Ryujinx.Graphics.Vic.csproj", "{81BB2C11-9408-4EA3-822E-42987AF54429}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Nvdec.H264", "Ryujinx.Graphics.Nvdec.H264\Ryujinx.Graphics.Nvdec.H264.csproj", "{990F9601-343E-46CB-8529-B498FA761A92}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ryujinx.Graphics.Video", "Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj", "{FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -196,6 +208,54 @@ Global {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Profile Release|Any CPU.Build.0 = Release|Any CPU {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.ActiveCfg = Release|Any CPU {3DF35E3D-D844-4399-A9A1-A9E923264C17}.Release|Any CPU.Build.0 = Release|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C3002C3C-7B09-4FE7-894A-372EDA22FC6E}.Release|Any CPU.Build.0 = Release|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C35F1536-7DE5-4F9D-9604-B5B4E1561947}.Release|Any CPU.Build.0 = Release|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B9AECA11-E248-4886-A10B-81B631CAAF29}.Release|Any CPU.Build.0 = Release|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Debug|Any CPU.Build.0 = Debug|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.ActiveCfg = Release|Any CPU + {81BB2C11-9408-4EA3-822E-42987AF54429}.Release|Any CPU.Build.0 = Release|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Debug|Any CPU.Build.0 = Debug|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Release|Any CPU.ActiveCfg = Release|Any CPU + {990F9601-343E-46CB-8529-B498FA761A92}.Release|Any CPU.Build.0 = Release|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Debug|Any CPU.Build.0 = Debug|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Release|Any CPU.ActiveCfg = Release|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Profile Release|Any CPU.Build.0 = Release|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FD4A2C14-8E3D-4957-ABBE-3C38897B3E2D}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Ryujinx/Ryujinx.csproj b/Ryujinx/Ryujinx.csproj index cf672131..c6a83cf7 100644 --- a/Ryujinx/Ryujinx.csproj +++ b/Ryujinx/Ryujinx.csproj @@ -83,6 +83,7 @@ + @@ -100,6 +101,9 @@ PreserveNewest + + Always + diff --git a/Ryujinx/THIRDPARTY.md b/Ryujinx/THIRDPARTY.md new file mode 100644 index 00000000..94b7ec37 --- /dev/null +++ b/Ryujinx/THIRDPARTY.md @@ -0,0 +1,203 @@ +# ffmpeg (LGPLv3) +``` + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. + +``` + +# libvpx (BSD) +``` +Copyright (c) 2010, The WebM Project authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google, nor the WebM Project, nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` \ No newline at end of file