mirror of
https://github.com/GreemDev/Ryujinx.git
synced 2024-12-23 09:15:46 +00:00
5e724cf24e
* Delete DelegateTypes.cs * Delete DelegateCache.cs * Add files via upload * Update Horizon.cs * Update Program.cs * Update MainWindow.cs * Update Aot.cs * Update RelocEntry.cs * Update Translator.cs * Update MemoryManager.cs * Update InstEmitMemoryHelper.cs * Update Delegates.cs * Nit. * Nit. * Nit. * 10 fewer MSIL bytes for us * Add comment. Nits. * Update Translator.cs * Update Aot.cs * Nits. * Opt.. * Opt.. * Opt.. * Opt.. * Allow to change compression level. * Update MemoryManager.cs * Update Translator.cs * Manage corner cases during the save phase. Nits. * Update Aot.cs * Translator response tweak for Aot disabled. Nit. * Nit. * Nits. * Create DelegateHelpers.cs * Update Delegates.cs * Nit. * Nit. * Nits. * Fix due to #784. * Fixes due to #757 & #841. * Fix due to #846. * Fix due to #847. * Use MethodInfo for managed method calls. Use IR methods instead of managed methods about Max/Min (S/U). Follow-ups & Nits. * Add missing exception messages. Reintroduce slow path for Fmov_Vi. Implement slow path for Fmov_Si. * Switch to the new folder structure. Nits. * Impl. index-based relocation information. Impl. cache file version field. * Nit. * Address gdkchan comments. Mainly: - fixed cache file corruption issue on exit; - exposed a way to disable AOT on the GUI. * Address AcK77 comment. * Address Thealexbarney, jduncanator & emmauss comments. Header magic, CpuId (FI) & Aot -> Ptc. * Adaptation to the new application reloading system. Improvements to the call system of managed methods. Follow-ups. Nits. * Get the same boot times as on master when PTC is disabled. * Profiled Aot. * A32 support (#897). * #975 support (1 of 2). * #975 support (2 of 2). * Rebase fix & nits. * Some fixes and nits (still one bug left). * One fix & nits. * Tests fix (by gdk) & nits. * Support translations not only in high quality and rejit. Nits. * Added possibility to skip translations and continue execution, using `ESC` key. * Update SettingsWindow.cs * Update GLRenderer.cs * Update Ptc.cs * Disabled Profiled PTC by default as requested in the past by gdk. * Fix rejit bug. Increased number of parallel translations. Add stack unwinding stuffs support (1 of 2). Nits. * Add stack unwinding stuffs support (2 of 2). Tuned number of parallel translations. * Restored the ability to assemble jumps with 8-bit offset when Profiled PTC is disabled or during profiling. Modifications due to rebase. Nits. * Limited profiling of the functions to be translated to the addresses belonging to the range of static objects only. * Nits. * Nits. * Update Delegates.cs * Nit. * Update InstEmitSimdArithmetic.cs * Address riperiperi comments. * Fixed the issue of unjustifiably longer boot times at the second boot than at the first boot, measured at the same time or reference point and with the same number of translated functions. * Implemented a simple redundant load/save mechanism. Halved the value of Decoder.MaxInstsPerFunction more appropriate for the current performance of the Translator. Replaced by Logger.PrintError to Logger.PrintDebug in TexturePool.cs about the supposed invalid texture format to avoid the spawn of the log. Nits. * Nit. Improved Logger.PrintError in TexturePool.cs to avoid log spawn. Added missing code for FZ handling (in output) for fp max/min instructions (slow paths). * Add configuration migration for PTC Co-authored-by: Thog <me@thog.eu>
1567 lines
55 KiB
C#
1567 lines
55 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.State;
|
|
using ARMeilleure.Translation;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Reflection;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
|
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
using Func1I = Func<Operand, Operand>;
|
|
using Func2I = Func<Operand, Operand, Operand>;
|
|
using Func3I = Func<Operand, Operand, Operand, Operand>;
|
|
|
|
static class InstEmitSimdHelper
|
|
{
|
|
#region "Masks"
|
|
public static readonly long[] EvenMasks = new long[]
|
|
{
|
|
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
|
|
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
|
|
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
|
|
};
|
|
|
|
public static readonly long[] OddMasks = new long[]
|
|
{
|
|
15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
|
|
15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
|
|
15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
|
|
};
|
|
|
|
public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
|
|
#endregion
|
|
|
|
#region "X86 SSE Intrinsics"
|
|
public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Paddb,
|
|
Intrinsic.X86Paddw,
|
|
Intrinsic.X86Paddd,
|
|
Intrinsic.X86Paddq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pcmpeqb,
|
|
Intrinsic.X86Pcmpeqw,
|
|
Intrinsic.X86Pcmpeqd,
|
|
Intrinsic.X86Pcmpeqq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pcmpgtb,
|
|
Intrinsic.X86Pcmpgtw,
|
|
Intrinsic.X86Pcmpgtd,
|
|
Intrinsic.X86Pcmpgtq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pmaxsb,
|
|
Intrinsic.X86Pmaxsw,
|
|
Intrinsic.X86Pmaxsd
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pmaxub,
|
|
Intrinsic.X86Pmaxuw,
|
|
Intrinsic.X86Pmaxud
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pminsb,
|
|
Intrinsic.X86Pminsw,
|
|
Intrinsic.X86Pminsd
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pminub,
|
|
Intrinsic.X86Pminuw,
|
|
Intrinsic.X86Pminud
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pmovsxbw,
|
|
Intrinsic.X86Pmovsxwd,
|
|
Intrinsic.X86Pmovsxdq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Pmovzxbw,
|
|
Intrinsic.X86Pmovzxwd,
|
|
Intrinsic.X86Pmovzxdq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
|
|
{
|
|
0,
|
|
Intrinsic.X86Psllw,
|
|
Intrinsic.X86Pslld,
|
|
Intrinsic.X86Psllq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
|
|
{
|
|
0,
|
|
Intrinsic.X86Psraw,
|
|
Intrinsic.X86Psrad
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
|
|
{
|
|
0,
|
|
Intrinsic.X86Psrlw,
|
|
Intrinsic.X86Psrld,
|
|
Intrinsic.X86Psrlq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Psubb,
|
|
Intrinsic.X86Psubw,
|
|
Intrinsic.X86Psubd,
|
|
Intrinsic.X86Psubq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Punpckhbw,
|
|
Intrinsic.X86Punpckhwd,
|
|
Intrinsic.X86Punpckhdq,
|
|
Intrinsic.X86Punpckhqdq
|
|
};
|
|
|
|
public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
|
|
{
|
|
Intrinsic.X86Punpcklbw,
|
|
Intrinsic.X86Punpcklwd,
|
|
Intrinsic.X86Punpckldq,
|
|
Intrinsic.X86Punpcklqdq
|
|
};
|
|
#endregion
|
|
|
|
public static int GetImmShl(OpCodeSimdShImm op)
|
|
{
|
|
return op.Imm - (8 << op.Size);
|
|
}
|
|
|
|
public static int GetImmShr(OpCodeSimdShImm op)
|
|
{
|
|
return (8 << (op.Size + 1)) - op.Imm;
|
|
}
|
|
|
|
public static Operand X86GetScalar(ArmEmitterContext context, float value)
|
|
{
|
|
return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
|
|
}
|
|
|
|
public static Operand X86GetScalar(ArmEmitterContext context, double value)
|
|
{
|
|
return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
|
|
}
|
|
|
|
public static Operand X86GetScalar(ArmEmitterContext context, int value)
|
|
{
|
|
return context.VectorCreateScalar(Const(value));
|
|
}
|
|
|
|
public static Operand X86GetScalar(ArmEmitterContext context, long value)
|
|
{
|
|
return context.VectorCreateScalar(Const(value));
|
|
}
|
|
|
|
public static Operand X86GetAllElements(ArmEmitterContext context, float value)
|
|
{
|
|
return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
|
|
}
|
|
|
|
public static Operand X86GetAllElements(ArmEmitterContext context, double value)
|
|
{
|
|
return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
|
|
}
|
|
|
|
public static Operand X86GetAllElements(ArmEmitterContext context, int value)
|
|
{
|
|
Operand vector = context.VectorCreateScalar(Const(value));
|
|
|
|
vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
|
|
|
|
return vector;
|
|
}
|
|
|
|
public static Operand X86GetAllElements(ArmEmitterContext context, long value)
|
|
{
|
|
Operand vector = context.VectorCreateScalar(Const(value));
|
|
|
|
vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
|
|
|
|
return vector;
|
|
}
|
|
|
|
public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
|
|
{
|
|
Operand vector0 = context.VectorCreateScalar(Const(e0));
|
|
Operand vector1 = context.VectorCreateScalar(Const(e1));
|
|
|
|
return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
|
|
}
|
|
|
|
public static int X86GetRoundControl(FPRoundingMode roundMode)
|
|
{
|
|
switch (roundMode)
|
|
{
|
|
case FPRoundingMode.ToNearest: return 8 | 0; // even
|
|
case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
|
|
case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
|
|
case FPRoundingMode.TowardsZero: return 8 | 3;
|
|
}
|
|
|
|
throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
|
|
}
|
|
|
|
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
|
|
|
Operand res = context.AddIntrinsic(inst, n);
|
|
|
|
if ((op.Size & 1) != 0)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
else
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
Operand m = GetVec(op.Rm);
|
|
|
|
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
|
|
|
Operand res = context.AddIntrinsic(inst, n, m);
|
|
|
|
if ((op.Size & 1) != 0)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
else
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
|
|
|
Operand res = context.AddIntrinsic(inst, n);
|
|
|
|
if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
Operand m = GetVec(op.Rm);
|
|
|
|
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
|
|
|
|
Operand res = context.AddIntrinsic(inst, n, m);
|
|
|
|
if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
|
|
{
|
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
|
|
|
MethodInfo info = (op.Size & 1) == 0
|
|
? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
|
|
: typeof(Math). GetMethod(name, new Type[] { typeof(double) });
|
|
|
|
return context.Call(info, n);
|
|
}
|
|
|
|
public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
|
|
{
|
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
|
|
|
string name = nameof(Math.Round);
|
|
|
|
MethodInfo info = (op.Size & 1) == 0
|
|
? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
|
|
: typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
|
|
|
|
return context.Call(info, n, Const((int)roundMode));
|
|
}
|
|
|
|
public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
|
|
{
|
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
|
|
|
MethodInfo info = (op.Size & 1) == 0
|
|
? typeof(SoftFloat32).GetMethod(name)
|
|
: typeof(SoftFloat64).GetMethod(name);
|
|
|
|
return context.Call(info, callArgs);
|
|
}
|
|
|
|
public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
|
|
|
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
|
|
}
|
|
|
|
public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
|
|
|
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
|
|
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
|
|
}
|
|
|
|
public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
|
|
|
|
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
|
|
Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
|
|
|
|
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
|
|
|
|
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
|
|
Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
|
|
|
|
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
|
|
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
|
|
Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
|
|
|
|
d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
|
|
}
|
|
|
|
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
|
|
}
|
|
|
|
public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
|
|
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
|
|
}
|
|
|
|
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int elems = op.GetBytesCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
|
|
|
|
res = context.VectorInsert(res, emit(ne), index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int elems = op.GetBytesCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
|
|
Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
|
|
|
|
res = context.VectorInsert(res, emit(ne, me), index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int elems = op.GetBytesCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
|
|
Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
|
|
|
|
res = context.VectorInsert(res, emit(de, ne, me), index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int elems = op.GetBytesCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
|
|
Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
|
|
|
|
res = context.VectorInsert(res, emit(ne, me), index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int elems = op.GetBytesCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
|
|
Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
|
|
|
|
res = context.VectorInsert(res, emit(de, ne, me), index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
|
|
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
|
|
|
|
Operand imm = Const(op.Immediate);
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
|
|
|
|
Operand imm = Const(op.Immediate);
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenRmBinaryOp(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenRmBinaryOp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 8 >> op.Size;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
|
|
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 8 >> op.Size;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
|
|
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 8 >> op.Size;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
|
|
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
|
|
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
|
|
{
|
|
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
|
|
|
|
int elems = 8 >> op.Size;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
|
|
{
|
|
EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
|
|
{
|
|
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
|
|
|
|
int elems = 8 >> op.Size;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
|
|
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorPairwiseOp(context, emit, signed: true);
|
|
}
|
|
|
|
public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorPairwiseOp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int pairs = op.GetPairsCount() >> op.Size;
|
|
|
|
for (int index = 0; index < pairs; index++)
|
|
{
|
|
int pairIndex = index << 1;
|
|
|
|
Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
|
|
Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
|
|
|
|
Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
|
|
Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
|
|
|
|
res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
|
|
res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
Operand m = GetVec(op.Rm);
|
|
|
|
if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
|
|
Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
|
|
|
|
Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
|
|
|
|
Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
|
|
Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
|
|
}
|
|
else if (op.Size < 3)
|
|
{
|
|
Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
|
|
|
|
Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
|
|
Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
|
|
|
|
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
|
|
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
|
|
}
|
|
else
|
|
{
|
|
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
|
|
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
|
|
}
|
|
}
|
|
|
|
public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
|
|
}
|
|
|
|
public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
|
|
}
|
|
|
|
public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
|
|
}
|
|
|
|
public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
|
|
}
|
|
|
|
private static void EmitVectorAcrossVectorOp(
|
|
ArmEmitterContext context,
|
|
Func2I emit,
|
|
bool signed,
|
|
bool isLong)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int elems = op.GetBytesCount() >> op.Size;
|
|
|
|
Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
|
|
|
|
for (int index = 1; index < elems; index++)
|
|
{
|
|
Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
|
|
|
|
res = emit(res, n);
|
|
}
|
|
|
|
int size = isLong ? op.Size + 1 : op.Size;
|
|
|
|
Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
|
|
|
|
Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
for (int index = 1; index < 4; index++)
|
|
{
|
|
Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
|
|
|
|
res = emit(res, n);
|
|
}
|
|
|
|
Operand d = context.VectorInsert(context.VectorZero(), res, 0);
|
|
|
|
context.Copy(GetVec(op.Rd), d);
|
|
}
|
|
|
|
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
|
|
|
int pairs = op.GetPairsCount() >> sizeF + 2;
|
|
|
|
for (int index = 0; index < pairs; index++)
|
|
{
|
|
int pairIndex = index << 1;
|
|
|
|
Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
|
|
Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
|
|
|
|
Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
|
|
Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
|
|
|
|
res = context.VectorInsert(res, emit(n0, n1), index);
|
|
res = context.VectorInsert(res, emit(m0, m1), pairs + index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
|
{
|
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
Operand m = GetVec(op.Rm);
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
|
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
|
|
}
|
|
else /* if (op.RegisterSize == RegisterSize.Simd128) */
|
|
{
|
|
const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
|
|
const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
|
|
|
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
|
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
|
|
}
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
|
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
|
|
|
|
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
|
|
}
|
|
}
|
|
|
|
public enum CmpCondition
|
|
{
|
|
// Legacy Sse.
|
|
Equal = 0, // Ordered, non-signaling.
|
|
LessThan = 1, // Ordered, signaling.
|
|
LessThanOrEqual = 2, // Ordered, signaling.
|
|
UnorderedQ = 3, // Non-signaling.
|
|
NotLessThan = 5, // Unordered, signaling.
|
|
NotLessThanOrEqual = 6, // Unordered, signaling.
|
|
OrderedQ = 7, // Non-signaling.
|
|
|
|
// Vex.
|
|
GreaterThanOrEqual = 13, // Ordered, signaling.
|
|
GreaterThan = 14, // Ordered, signaling.
|
|
OrderedS = 23 // Signaling.
|
|
}
|
|
|
|
[Flags]
|
|
public enum SaturatingFlags
|
|
{
|
|
Scalar = 1 << 0,
|
|
Signed = 1 << 1,
|
|
|
|
Add = 1 << 2,
|
|
Sub = 1 << 3,
|
|
|
|
Accumulate = 1 << 4,
|
|
|
|
ScalarSx = Scalar | Signed,
|
|
ScalarZx = Scalar,
|
|
|
|
VectorSx = Signed,
|
|
VectorZx = 0
|
|
}
|
|
|
|
public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
|
|
}
|
|
|
|
public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
|
|
}
|
|
|
|
private static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
Operand de;
|
|
|
|
if (op.Size <= 2)
|
|
{
|
|
de = EmitSatQ(context, emit(ne), op.Size, signedSrc: true, signedDst: true);
|
|
}
|
|
else /* if (op.Size == 3) */
|
|
{
|
|
de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, de, index, op.Size);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
|
|
{
|
|
EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarSx | flags);
|
|
}
|
|
|
|
public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
|
|
{
|
|
EmitSaturatingBinaryOp(context, null, SaturatingFlags.ScalarZx | flags);
|
|
}
|
|
|
|
public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, SaturatingFlags flags)
|
|
{
|
|
EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorSx | flags);
|
|
}
|
|
|
|
public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
|
|
{
|
|
EmitSaturatingBinaryOp(context, null, SaturatingFlags.VectorZx | flags);
|
|
}
|
|
|
|
public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
|
|
bool signed = (flags & SaturatingFlags.Signed) != 0;
|
|
|
|
bool add = (flags & SaturatingFlags.Add) != 0;
|
|
bool sub = (flags & SaturatingFlags.Sub) != 0;
|
|
|
|
bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
|
|
|
|
if (add || sub)
|
|
{
|
|
OpCodeSimdReg opReg = (OpCodeSimdReg)op;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de;
|
|
Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
|
|
Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
|
|
|
|
if (op.Size <= 2)
|
|
{
|
|
Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
|
|
|
|
de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
|
|
}
|
|
else if (add) /* if (op.Size == 3) */
|
|
{
|
|
de = EmitBinarySatQAdd(context, ne, me, signed);
|
|
}
|
|
else /* if (sub) */
|
|
{
|
|
de = EmitBinarySatQSub(context, ne, me, signed);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, de, index, op.Size);
|
|
}
|
|
}
|
|
else if (accumulate)
|
|
{
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand de;
|
|
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
|
|
Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
|
|
|
|
if (op.Size <= 2)
|
|
{
|
|
Operand temp = context.Add(ne, me);
|
|
|
|
de = EmitSatQ(context, temp, op.Size, signedSrc: true, signedDst: signed);
|
|
}
|
|
else /* if (op.Size == 3) */
|
|
{
|
|
de = EmitBinarySatQAccumulate(context, ne, me, signed);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, de, index, op.Size);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
OpCodeSimdReg opReg = (OpCodeSimdReg)op;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtract(context, opReg.Rn, index, op.Size, signed);
|
|
Operand me = EmitVectorExtract(context, opReg.Rm, index, op.Size, signed);
|
|
|
|
Operand de = EmitSatQ(context, emit(ne, me), op.Size, true, signed);
|
|
|
|
res = EmitVectorInsert(context, res, de, index, op.Size);
|
|
}
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
[Flags]
|
|
public enum SaturatingNarrowFlags
|
|
{
|
|
Scalar = 1 << 0,
|
|
SignedSrc = 1 << 1,
|
|
SignedDst = 1 << 2,
|
|
|
|
ScalarSxSx = Scalar | SignedSrc | SignedDst,
|
|
ScalarSxZx = Scalar | SignedSrc,
|
|
ScalarZxZx = Scalar,
|
|
|
|
VectorSxSx = SignedSrc | SignedDst,
|
|
VectorSxZx = SignedSrc,
|
|
VectorZxZx = 0
|
|
}
|
|
|
|
public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
|
|
bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
|
|
bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
|
|
|
|
int elems = !scalar ? 8 >> op.Size : 1;
|
|
|
|
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
|
|
|
|
Operand temp = EmitSatQ(context, ne, op.Size, signedSrc, signedDst);
|
|
|
|
res = EmitVectorInsert(context, res, temp, part + index, op.Size);
|
|
}
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
|
|
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
|
|
public static Operand EmitSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedSrc, bool signedDst)
|
|
{
|
|
if ((uint)sizeDst > 2u)
|
|
{
|
|
throw new ArgumentOutOfRangeException(nameof(sizeDst));
|
|
}
|
|
|
|
MethodInfo info;
|
|
|
|
if (signedSrc)
|
|
{
|
|
info = signedDst
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedSrcSignedDstSatQ))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedSrcUnsignedDstSatQ));
|
|
}
|
|
else
|
|
{
|
|
info = signedDst
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcSignedDstSatQ))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ));
|
|
}
|
|
|
|
return context.Call(info, op, Const(sizeDst));
|
|
}
|
|
|
|
// TSrc (64bit) == TDst (64bit); signed.
|
|
public static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
|
|
{
|
|
Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
|
|
|
|
return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnarySignedSatQAbsOrNeg)), op);
|
|
}
|
|
|
|
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
|
|
public static Operand EmitBinarySatQAdd(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
|
|
{
|
|
Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
|
|
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAdd))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQAdd));
|
|
|
|
return context.Call(info, op1, op2);
|
|
}
|
|
|
|
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
|
|
public static Operand EmitBinarySatQSub(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
|
|
{
|
|
Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
|
|
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQSub))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQSub));
|
|
|
|
return context.Call(info, op1, op2);
|
|
}
|
|
|
|
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
|
|
public static Operand EmitBinarySatQAccumulate(ArmEmitterContext context, Operand op1, Operand op2, bool signed)
|
|
{
|
|
Debug.Assert(((OpCodeSimd)context.CurrOp).Size == 3, "Invalid element size.");
|
|
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinarySignedSatQAcc))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.BinaryUnsignedSatQAcc));
|
|
|
|
return context.Call(info, op1, op2);
|
|
}
|
|
|
|
public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
|
|
{
|
|
Operand mask;
|
|
if (single)
|
|
{
|
|
mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
|
|
}
|
|
else
|
|
{
|
|
mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
|
|
}
|
|
|
|
return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
|
|
}
|
|
|
|
public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
|
|
{
|
|
return EmitVectorExtract(context, reg, index, size, true);
|
|
}
|
|
|
|
public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
|
|
{
|
|
return EmitVectorExtract(context, reg, index, size, false);
|
|
}
|
|
|
|
public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
|
|
{
|
|
ThrowIfInvalid(index, size);
|
|
|
|
Operand res = null;
|
|
|
|
switch (size)
|
|
{
|
|
case 0:
|
|
res = context.VectorExtract8(GetVec(reg), index);
|
|
break;
|
|
|
|
case 1:
|
|
res = context.VectorExtract16(GetVec(reg), index);
|
|
break;
|
|
|
|
case 2:
|
|
res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
|
|
break;
|
|
|
|
case 3:
|
|
res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
|
|
break;
|
|
}
|
|
|
|
if (signed)
|
|
{
|
|
switch (size)
|
|
{
|
|
case 0: res = context.SignExtend8 (OperandType.I64, res); break;
|
|
case 1: res = context.SignExtend16(OperandType.I64, res); break;
|
|
case 2: res = context.SignExtend32(OperandType.I64, res); break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (size)
|
|
{
|
|
case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
|
|
case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
|
|
case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
|
|
{
|
|
ThrowIfInvalid(index, size);
|
|
|
|
if (size < 3 && value.Type == OperandType.I64)
|
|
{
|
|
value = context.ConvertI64ToI32(value);
|
|
}
|
|
|
|
switch (size)
|
|
{
|
|
case 0: vector = context.VectorInsert8 (vector, value, index); break;
|
|
case 1: vector = context.VectorInsert16(vector, value, index); break;
|
|
case 2: vector = context.VectorInsert (vector, value, index); break;
|
|
case 3: vector = context.VectorInsert (vector, value, index); break;
|
|
}
|
|
|
|
return vector;
|
|
}
|
|
|
|
public static void ThrowIfInvalid(int index, int size)
|
|
{
|
|
if ((uint)size > 3u)
|
|
{
|
|
throw new ArgumentOutOfRangeException(nameof(size));
|
|
}
|
|
|
|
if ((uint)index >= 16u >> size)
|
|
{
|
|
throw new ArgumentOutOfRangeException(nameof(index));
|
|
}
|
|
}
|
|
}
|
|
}
|