diff --git a/Ryujinx.Common/XXHash128.cs b/Ryujinx.Common/XXHash128.cs index 827e4cb2..731a624e 100644 --- a/Ryujinx.Common/XXHash128.cs +++ b/Ryujinx.Common/XXHash128.cs @@ -66,33 +66,13 @@ namespace Ryujinx.Common } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private unsafe static Hash128 Mult64To128(ulong lhs, ulong rhs) + private static Hash128 Mult64To128(ulong lhs, ulong rhs) { - // TODO: Use BigMul once .NET 5 lands. - if (Bmi2.X64.IsSupported) - { - ulong low; - ulong high = Bmi2.X64.MultiplyNoFlags(lhs, rhs, &low); - return new Hash128 - { - Low = low, - High = high - }; - } - - ulong loLo = Mult32To64((uint)lhs, (uint)rhs); - ulong hiLo = Mult32To64(lhs >> 32, (uint)rhs); - ulong loHi = Mult32To64((uint)lhs, rhs >> 32); - ulong hiHi = Mult32To64(lhs >> 32, rhs >> 32); - - ulong cross = (loLo >> 32) + (uint)hiLo + loHi; - ulong upper = (hiLo >> 32) + (cross >> 32) + hiHi; - ulong lower = (cross << 32) | (uint)loLo; - + ulong high = Math.BigMul(lhs, rhs, out ulong low); return new Hash128 { - Low = lower, - High = upper + Low = low, + High = high }; } @@ -321,9 +301,10 @@ namespace Ryujinx.Common return Xxh3Avalanche(result64); } + [SkipLocalsInit] private static Hash128 Xxh3HashLong128bInternal(ReadOnlySpan input, ReadOnlySpan secret) { - Span acc = stackalloc ulong[AccNb]; // TODO: Use SkipLocalsInit attribute once .NET 5 lands. + Span acc = stackalloc ulong[AccNb]; Xxh3InitAcc.CopyTo(acc); Xxh3HashLongInternalLoop(acc, input, secret); diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs index a7c6d148..641188f8 100644 --- a/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs +++ b/Ryujinx.Graphics.Nvdec.Vp9/Common/BitUtils.cs @@ -7,8 +7,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Common { internal static class BitUtils { - // FIXME: Enable inlining here after AVX2 gather bug is fixed. - // [MethodImpl(MethodImplOptions.AggressiveInlining)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte ClipPixel(int val) { return (byte)((val > 255) ? 255 : (val < 0) ? 0 : val); diff --git a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs index 012b0c60..9e267376 100644 --- a/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs +++ b/Ryujinx.Graphics.Nvdec.Vp9/DecodeFrame.cs @@ -374,11 +374,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9 } while (--bH != 0); } - [StructLayout(LayoutKind.Sequential, Size = 80 * 2 * 80 * 2)] - struct McBufHigh - { - } - + [SkipLocalsInit] private static unsafe void ExtendAndPredict( byte* bufPtr1, int preBufStride, @@ -402,8 +398,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9 int xs, int ys) { - McBufHigh mcBufHighStruct; - ushort* mcBufHigh = (ushort*)Unsafe.AsPointer(ref mcBufHighStruct); // Avoid zero initialization. + ushort* mcBufHigh = stackalloc ushort[80 * 2 * 80 * 2]; if (xd.CurBuf.HighBd) { HighBuildMcBorder(bufPtr1, preBufStride, mcBufHigh, bW, x0, y0, bW, bH, frameWidth, frameHeight); diff --git a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs index b74c33dc..1a2969af 100644 --- a/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs +++ b/Ryujinx.Graphics.Nvdec.Vp9/Dsp/Convolve.cs @@ -389,11 +389,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp ConvolveAvgVert(src, srcStride, dst, dstStride, filter, y0Q4, yStepQ4, w, h); } - [StructLayout(LayoutKind.Sequential, Size = 64 * 135)] - struct Temp - { - } - + [SkipLocalsInit] public static unsafe void Convolve8( byte* src, int srcStride, @@ -422,8 +418,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9.Dsp // When calling in frame scaling function, the smallest scaling factor is x1/4 // ==> yStepQ4 = 64. Since w and h are at most 16, the temp buffer is still // big enough. - Temp tempStruct; - byte* temp = (byte*)Unsafe.AsPointer(ref tempStruct); // Avoid zero initialization. + byte* temp = stackalloc byte[64 * 135]; int intermediateHeight = (((h - 1) * yStepQ4 + y0Q4) >> SubpelBits) + SubpelTaps; Debug.Assert(w <= 64);