Amadeus: Add ARM SIMD fast path (#2069)
Add fast paths in the audio renderer for AArch64 in all current fast paths.
This commit is contained in:
parent
3d04d7ef08
commit
31fca432a7
3 changed files with 97 additions and 1 deletions
|
@ -19,6 +19,7 @@ using System;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Runtime.Intrinsics.Arm;
|
||||||
using System.Runtime.Intrinsics.X86;
|
using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
namespace Ryujinx.Audio.Renderer.Dsp.Command
|
namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
|
@ -89,6 +90,27 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private void ProcessMixAdvSimd(Span<float> outputMix, ReadOnlySpan<float> inputMix)
|
||||||
|
{
|
||||||
|
Vector128<float> volumeVec = Vector128.Create(Volume);
|
||||||
|
|
||||||
|
ReadOnlySpan<Vector128<float>> inputVec = MemoryMarshal.Cast<float, Vector128<float>>(inputMix);
|
||||||
|
Span<Vector128<float>> outputVec = MemoryMarshal.Cast<float, Vector128<float>>(outputMix);
|
||||||
|
|
||||||
|
int sisdStart = inputVec.Length * 4;
|
||||||
|
|
||||||
|
for (int i = 0; i < inputVec.Length; i++)
|
||||||
|
{
|
||||||
|
outputVec[i] = AdvSimd.Add(outputVec[i], AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec)));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = sisdStart; i < inputMix.Length; i++)
|
||||||
|
{
|
||||||
|
outputMix[i] += FloatingPointHelper.MultiplyRoundUp(inputMix[i], Volume);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private void ProcessMixSlowPath(Span<float> outputMix, ReadOnlySpan<float> inputMix)
|
private void ProcessMixSlowPath(Span<float> outputMix, ReadOnlySpan<float> inputMix)
|
||||||
{
|
{
|
||||||
|
@ -108,6 +130,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
{
|
{
|
||||||
ProcessMixSse41(outputMix, inputMix);
|
ProcessMixSse41(outputMix, inputMix);
|
||||||
}
|
}
|
||||||
|
else if (AdvSimd.IsSupported)
|
||||||
|
{
|
||||||
|
ProcessMixAdvSimd(outputMix, inputMix);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ProcessMixSlowPath(outputMix, inputMix);
|
ProcessMixSlowPath(outputMix, inputMix);
|
||||||
|
|
|
@ -19,6 +19,7 @@ using System;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Runtime.Intrinsics.Arm;
|
||||||
using System.Runtime.Intrinsics.X86;
|
using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
namespace Ryujinx.Audio.Renderer.Dsp.Command
|
namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
|
@ -89,6 +90,26 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void ProcessVolumeAdvSimd(Span<float> outputBuffer, ReadOnlySpan<float> inputBuffer)
|
||||||
|
{
|
||||||
|
Vector128<float> volumeVec = Vector128.Create(Volume);
|
||||||
|
|
||||||
|
ReadOnlySpan<Vector128<float>> inputVec = MemoryMarshal.Cast<float, Vector128<float>>(inputBuffer);
|
||||||
|
Span<Vector128<float>> outputVec = MemoryMarshal.Cast<float, Vector128<float>>(outputBuffer);
|
||||||
|
|
||||||
|
int sisdStart = inputVec.Length * 4;
|
||||||
|
|
||||||
|
for (int i = 0; i < inputVec.Length; i++)
|
||||||
|
{
|
||||||
|
outputVec[i] = AdvSimd.Ceiling(AdvSimd.Multiply(inputVec[i], volumeVec));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = sisdStart; i < inputBuffer.Length; i++)
|
||||||
|
{
|
||||||
|
outputBuffer[i] = FloatingPointHelper.MultiplyRoundUp(inputBuffer[i], Volume);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void ProcessVolume(Span<float> outputBuffer, ReadOnlySpan<float> inputBuffer)
|
private void ProcessVolume(Span<float> outputBuffer, ReadOnlySpan<float> inputBuffer)
|
||||||
{
|
{
|
||||||
if (Avx.IsSupported)
|
if (Avx.IsSupported)
|
||||||
|
@ -99,6 +120,10 @@ namespace Ryujinx.Audio.Renderer.Dsp.Command
|
||||||
{
|
{
|
||||||
ProcessVolumeSse41(outputBuffer, inputBuffer);
|
ProcessVolumeSse41(outputBuffer, inputBuffer);
|
||||||
}
|
}
|
||||||
|
else if (AdvSimd.IsSupported)
|
||||||
|
{
|
||||||
|
ProcessVolumeAdvSimd(outputBuffer, inputBuffer);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ProcessVolumeSlowPath(outputBuffer, inputBuffer);
|
ProcessVolumeSlowPath(outputBuffer, inputBuffer);
|
||||||
|
|
|
@ -26,6 +26,7 @@ using System.Diagnostics;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Runtime.Intrinsics.Arm;
|
||||||
using System.Runtime.Intrinsics.X86;
|
using System.Runtime.Intrinsics.X86;
|
||||||
using static Ryujinx.Audio.Renderer.Parameter.VoiceInParameter;
|
using static Ryujinx.Audio.Renderer.Parameter.VoiceInParameter;
|
||||||
|
|
||||||
|
@ -320,6 +321,24 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void ToFloatAdvSimd(Span<float> output, ReadOnlySpan<int> input, int sampleCount)
|
||||||
|
{
|
||||||
|
ReadOnlySpan<Vector128<int>> inputVec = MemoryMarshal.Cast<int, Vector128<int>>(input);
|
||||||
|
Span<Vector128<float>> outputVec = MemoryMarshal.Cast<float, Vector128<float>>(output);
|
||||||
|
|
||||||
|
int sisdStart = inputVec.Length * 4;
|
||||||
|
|
||||||
|
for (int i = 0; i < inputVec.Length; i++)
|
||||||
|
{
|
||||||
|
outputVec[i] = AdvSimd.ConvertToSingle(inputVec[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = sisdStart; i < sampleCount; i++)
|
||||||
|
{
|
||||||
|
output[i] = input[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static void ToFloatSlow(Span<float> output, ReadOnlySpan<int> input, int sampleCount)
|
public static void ToFloatSlow(Span<float> output, ReadOnlySpan<int> input, int sampleCount)
|
||||||
{
|
{
|
||||||
|
@ -339,6 +358,10 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||||
{
|
{
|
||||||
ToFloatSse2(output, input, sampleCount);
|
ToFloatSse2(output, input, sampleCount);
|
||||||
}
|
}
|
||||||
|
else if (AdvSimd.IsSupported)
|
||||||
|
{
|
||||||
|
ToFloatAdvSimd(output, input, sampleCount);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ToFloatSlow(output, input, sampleCount);
|
ToFloatSlow(output, input, sampleCount);
|
||||||
|
@ -372,7 +395,25 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||||
|
|
||||||
for (int i = 0; i < inputVec.Length; i++)
|
for (int i = 0; i < inputVec.Length; i++)
|
||||||
{
|
{
|
||||||
outputVec[i] = Avx.ConvertToVector128Int32(inputVec[i]);
|
outputVec[i] = Sse2.ConvertToVector128Int32(inputVec[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = sisdStart; i < sampleCount; i++)
|
||||||
|
{
|
||||||
|
output[i] = (int)input[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void ToIntAdvSimd(Span<int> output, ReadOnlySpan<float> input, int sampleCount)
|
||||||
|
{
|
||||||
|
ReadOnlySpan<Vector128<float>> inputVec = MemoryMarshal.Cast<float, Vector128<float>>(input);
|
||||||
|
Span<Vector128<int>> outputVec = MemoryMarshal.Cast<int, Vector128<int>>(output);
|
||||||
|
|
||||||
|
int sisdStart = inputVec.Length * 4;
|
||||||
|
|
||||||
|
for (int i = 0; i < inputVec.Length; i++)
|
||||||
|
{
|
||||||
|
outputVec[i] = AdvSimd.ConvertToInt32RoundToZero(inputVec[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = sisdStart; i < sampleCount; i++)
|
for (int i = sisdStart; i < sampleCount; i++)
|
||||||
|
@ -400,6 +441,10 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||||
{
|
{
|
||||||
ToIntSse2(output, input, sampleCount);
|
ToIntSse2(output, input, sampleCount);
|
||||||
}
|
}
|
||||||
|
else if (AdvSimd.IsSupported)
|
||||||
|
{
|
||||||
|
ToIntAdvSimd(output, input, sampleCount);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ToIntSlow(output, input, sampleCount);
|
ToIntSlow(output, input, sampleCount);
|
||||||
|
|
Reference in a new issue