diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs index 84d81fac..6994643e 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -105,11 +105,48 @@ namespace ARMeilleure.Instructions } else if (op.Size == 1 && op.Opc == 3) // Double -> Half. { - throw new NotImplementedException("Double-precision to half-precision."); + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); + + Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne); + + res = context.ZeroExtend16(OperandType.I64, res); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); + } } - else if (op.Size == 3 && op.Opc == 1) // Double -> Half. + else if (op.Size == 3 && op.Opc == 1) // Half -> Double. { - throw new NotImplementedException("Half-precision to double-precision."); + if (Optimizations.UseF16c) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn)); + res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res); + res = context.VectorZeroUpper64(res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); + + Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } } else // Invalid encoding. { diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index ec66bb86..a138f5df 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -75,71 +75,50 @@ namespace ARMeilleure.Instructions return tbl; } - } - static class SoftFloat16_32 - { - public static float FPConvert(ushort valueBits) + public static void FPProcessException(FPException exc, ExecutionContext context) { - ExecutionContext context = NativeInterface.GetContext(); + FPProcessException(exc, context, context.Fpcr); + } - double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + public static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) + { + int enable = (int)exc + 8; - float result; - - if (type == FPType.SNaN || type == FPType.QNaN) + if ((fpcr & (FPCR)(1 << enable)) != 0) { - if ((context.Fpcr & FPCR.Dn) != 0) - { - result = FPDefaultNaN(); - } - else - { - result = FPConvertNaN(valueBits); - } - - if (type == FPType.SNaN) - { - FPProcessException(FPException.InvalidOp, context); - } - } - else if (type == FPType.Infinity) - { - result = FPInfinity(sign); - } - else if (type == FPType.Zero) - { - result = FPZero(sign); + throw new NotImplementedException("Floating-point trap handling."); } else { - result = FPRoundCv(real, context); + context.Fpsr |= (FPSR)(1 << (int)exc); } - - return result; } + } - private static float FPDefaultNaN() + static class SoftFloat16 + { + public static ushort FPDefaultNaN() { - return BitConverter.Int32BitsToSingle(0x7fc00000); + return (ushort)0x7E00u; } - private static float FPInfinity(bool sign) + public static ushort FPInfinity(bool sign) { - return sign ? float.NegativeInfinity : float.PositiveInfinity; + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; } - private static float FPZero(bool sign) + public static ushort FPZero(bool sign) { - return sign ? -0f : +0f; + return sign ? (ushort)0x8000u : (ushort)0x0000u; } - private static float FPMaxNormal(bool sign) + public static ushort FPMaxNormal(bool sign) { - return sign ? float.MinValue : float.MaxValue; + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; } - private static double FPUnpackCv( + public static double FPUnpackCv( this ushort valueBits, out FPType type, out bool sign, @@ -187,285 +166,7 @@ namespace ARMeilleure.Instructions return sign ? -real : real; } - private static float FPRoundCv(double real, ExecutionContext context) - { - const int minimumExp = -126; - - const int e = 8; - const int f = 23; - - bool sign; - double mantissa; - - if (real < 0d) - { - sign = true; - mantissa = -real; - } - else - { - sign = false; - mantissa = real; - } - - int exponent = 0; - - while (mantissa < 1d) - { - mantissa *= 2d; - exponent--; - } - - while (mantissa >= 2d) - { - mantissa /= 2d; - exponent++; - } - - if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) - { - context.Fpsr |= FPSR.Ufc; - - return FPZero(sign); - } - - uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); - - if (biasedExp == 0u) - { - mantissa /= Math.Pow(2d, minimumExp - exponent); - } - - uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); - double error = mantissa * Math.Pow(2d, f) - (double)intMant; - - if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) - { - FPProcessException(FPException.Underflow, context); - } - - bool overflowToInf; - bool roundUp; - - switch (context.Fpcr.GetRoundingMode()) - { - default: - case FPRoundingMode.ToNearest: - roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); - overflowToInf = true; - break; - - case FPRoundingMode.TowardsPlusInfinity: - roundUp = (error != 0d && !sign); - overflowToInf = !sign; - break; - - case FPRoundingMode.TowardsMinusInfinity: - roundUp = (error != 0d && sign); - overflowToInf = sign; - break; - - case FPRoundingMode.TowardsZero: - roundUp = false; - overflowToInf = false; - break; - } - - if (roundUp) - { - intMant++; - - if (intMant == 1u << f) - { - biasedExp = 1u; - } - - if (intMant == 1u << (f + 1)) - { - biasedExp++; - intMant >>= 1; - } - } - - float result; - - if (biasedExp >= (1u << e) - 1u) - { - result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - - FPProcessException(FPException.Overflow, context); - - error = 1d; - } - else - { - result = BitConverter.Int32BitsToSingle( - (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); - } - - if (error != 0d) - { - FPProcessException(FPException.Inexact, context); - } - - return result; - } - - private static float FPConvertNaN(ushort valueBits) - { - return BitConverter.Int32BitsToSingle( - (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); - } - - private static void FPProcessException(FPException exc, ExecutionContext context) - { - int enable = (int)exc + 8; - - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) - { - throw new NotImplementedException("Floating-point trap handling."); - } - else - { - context.Fpsr |= (FPSR)(1 << (int)exc); - } - } - } - - static class SoftFloat32_16 - { - public static ushort FPConvert(float value) - { - ExecutionContext context = NativeInterface.GetContext(); - - double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); - - bool altHp = (context.Fpcr & FPCR.Ahp) != 0; - - ushort resultBits; - - if (type == FPType.SNaN || type == FPType.QNaN) - { - if (altHp) - { - resultBits = FPZero(sign); - } - else if ((context.Fpcr & FPCR.Dn) != 0) - { - resultBits = FPDefaultNaN(); - } - else - { - resultBits = FPConvertNaN(valueBits); - } - - if (type == FPType.SNaN || altHp) - { - FPProcessException(FPException.InvalidOp, context); - } - } - else if (type == FPType.Infinity) - { - if (altHp) - { - resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); - - FPProcessException(FPException.InvalidOp, context); - } - else - { - resultBits = FPInfinity(sign); - } - } - else if (type == FPType.Zero) - { - resultBits = FPZero(sign); - } - else - { - resultBits = FPRoundCv(real, context); - } - - return resultBits; - } - - private static ushort FPDefaultNaN() - { - return (ushort)0x7E00u; - } - - private static ushort FPInfinity(bool sign) - { - return sign ? (ushort)0xFC00u : (ushort)0x7C00u; - } - - private static ushort FPZero(bool sign) - { - return sign ? (ushort)0x8000u : (ushort)0x0000u; - } - - private static ushort FPMaxNormal(bool sign) - { - return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; - } - - private static double FPUnpackCv( - this float value, - out FPType type, - out bool sign, - out uint valueBits, - ExecutionContext context) - { - valueBits = (uint)BitConverter.SingleToInt32Bits(value); - - sign = (~valueBits & 0x80000000u) == 0u; - - uint exp32 = (valueBits & 0x7F800000u) >> 23; - uint frac32 = valueBits & 0x007FFFFFu; - - double real; - - if (exp32 == 0u) - { - if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) - { - type = FPType.Zero; - real = 0d; - - if (frac32 != 0u) - { - FPProcessException(FPException.InputDenorm, context); - } - } - else - { - type = FPType.Nonzero; // Subnormal. - real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); - } - } - else if (exp32 == 0xFFu) - { - if (frac32 == 0u) - { - type = FPType.Infinity; - real = Math.Pow(2d, 1000); - } - else - { - type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; - real = 0d; - } - } - else - { - type = FPType.Nonzero; // Normal. - real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); - } - - return sign ? -real : real; - } - - private static ushort FPRoundCv(double real, ExecutionContext context) + public static ushort FPRoundCv(double real, ExecutionContext context) { const int minimumExp = -14; @@ -512,7 +213,7 @@ namespace ARMeilleure.Instructions if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) { - FPProcessException(FPException.Underflow, context); + SoftFloat.FPProcessException(FPException.Underflow, context); } bool overflowToInf; @@ -566,7 +267,7 @@ namespace ARMeilleure.Instructions { resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); + SoftFloat.FPProcessException(FPException.Overflow, context); error = 1d; } @@ -581,7 +282,7 @@ namespace ARMeilleure.Instructions { resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); - FPProcessException(FPException.InvalidOp, context); + SoftFloat.FPProcessException(FPException.InvalidOp, context); error = 0d; } @@ -593,29 +294,475 @@ namespace ARMeilleure.Instructions if (error != 0d) { - FPProcessException(FPException.Inexact, context); + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return resultBits; + } + } + + static class SoftFloat16_32 + { + public static float FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = SoftFloat32.FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = SoftFloat32.FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = SoftFloat32.FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static float FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return SoftFloat32.FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + SoftFloat.FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? SoftFloat32.FPInfinity(sign) : SoftFloat32.FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + } + + static class SoftFloat16_64 + { + public static double FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = SoftFloat64.FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = SoftFloat64.FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = SoftFloat64.FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static double FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -1022; + + const int e = 11; + const int f = 52; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return SoftFloat64.FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + ulong intMant = (ulong)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + SoftFloat.FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1ul << f) + { + biasedExp = 1u; + } + + if (intMant == 1ul << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + double result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? SoftFloat64.FPInfinity(sign) : SoftFloat64.FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (biasedExp & 0x7FFul) << 52 | (intMant & 0x000FFFFFFFFFFFFFul))); + } + + if (error != 0d) + { + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static double FPConvertNaN(ushort valueBits) + { + return BitConverter.Int64BitsToDouble( + (long)(((ulong)valueBits & 0x8000ul) << 48 | 0x7FF8000000000000ul | ((ulong)valueBits & 0x01FFul) << 42)); + } + } + + static class SoftFloat32_16 + { + public static ushort FPConvert(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = SoftFloat16.FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = SoftFloat16.FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = SoftFloat16.FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = SoftFloat16.FPZero(sign); + } + else + { + resultBits = SoftFloat16.FPRoundCv(real, context); } return resultBits; } - private static ushort FPConvertNaN(uint valueBits) + private static double FPUnpackCv( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) { - return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); - } + valueBits = (uint)BitConverter.SingleToInt32Bits(value); - private static void FPProcessException(FPException exc, ExecutionContext context) - { - int enable = (int)exc + 8; + sign = (~valueBits & 0x80000000u) == 0u; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) { - throw new NotImplementedException("Floating-point trap handling."); + if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac32 != 0u) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } } else { - context.Fpsr |= (FPSR)(1 << (int)exc); + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); } + + return sign ? -real : real; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); } } @@ -645,7 +792,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -691,7 +838,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -734,7 +881,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -764,7 +911,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -793,7 +940,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -842,7 +989,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -850,7 +997,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -1079,7 +1226,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -1128,7 +1275,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -1143,7 +1290,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -1271,7 +1418,7 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } else if (MathF.Abs(value) < MathF.Pow(2f, -128)) { @@ -1288,8 +1435,8 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context, fpcr); - FPProcessException(FPException.Inexact, context, fpcr); + SoftFloat.FPProcessException(FPException.Overflow, context, fpcr); + SoftFloat.FPProcessException(FPException.Inexact, context, fpcr); } else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) { @@ -1460,13 +1607,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -1525,7 +1672,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -1580,7 +1727,7 @@ namespace ARMeilleure.Instructions { product = FPMulFpscr(value1, value2, true); } - + result = FPHalvedSub(FPThree(false), product, context, fpcr); } @@ -1653,7 +1800,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1694,7 +1841,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -1724,22 +1871,22 @@ namespace ARMeilleure.Instructions return result; } - private static float FPDefaultNaN() + public static float FPDefaultNaN() { return BitConverter.Int32BitsToSingle(0x7fc00000); } - private static float FPInfinity(bool sign) + public static float FPInfinity(bool sign) { return sign ? float.NegativeInfinity : float.PositiveInfinity; } - private static float FPZero(bool sign) + public static float FPZero(bool sign) { return sign ? -0f : +0f; } - private static float FPMaxNormal(bool sign) + public static float FPMaxNormal(bool sign) { return sign ? float.MinValue : float.MaxValue; } @@ -1790,7 +1937,7 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x007FFFFFu) != 0u) { - FPProcessException(FPException.InputDenorm, context, fpcr); + SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -1900,7 +2047,7 @@ namespace ARMeilleure.Instructions { op |= 1u << 22; - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } if ((fpcr & FPCR.Dn) != 0) @@ -1910,19 +2057,124 @@ namespace ARMeilleure.Instructions return BitConverter.Int32BitsToSingle((int)op); } + } - private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) + static class SoftFloat64_16 + { + public static ushort FPConvert(double value) { - int enable = (int)exc + 8; + ExecutionContext context = NativeInterface.GetContext(); - if ((fpcr & (FPCR)(1 << enable)) != 0) + double real = value.FPUnpackCv(out FPType type, out bool sign, out ulong valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) { - throw new NotImplementedException("Floating-point trap handling."); + if (altHp) + { + resultBits = SoftFloat16.FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = SoftFloat16.FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = SoftFloat16.FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = SoftFloat16.FPZero(sign); } else { - context.Fpsr |= (FPSR)(1 << (int)exc); + resultBits = SoftFloat16.FPRoundCv(real, context); } + + return resultBits; + } + + private static double FPUnpackCv( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0u; + + ulong exp64 = (valueBits & 0x7FF0000000000000ul) >> 52; + ulong frac64 = valueBits & 0x000FFFFFFFFFFFFFul; + + double real; + + if (exp64 == 0u) + { + if (frac64 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac64 != 0u) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -1022) * ((double)frac64 * Math.Pow(2d, -52)); + } + } + else if (exp64 == 0x7FFul) + { + if (frac64 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000000); + } + else + { + type = (~frac64 & 0x0008000000000000ul) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp64 - 1023) * (1d + (double)frac64 * Math.Pow(2d, -52)); + } + + return sign ? -real : real; + } + + private static ushort FPConvertNaN(ulong valueBits) + { + return (ushort)((valueBits & 0x8000000000000000ul) >> 48 | 0x7E00u | (valueBits & 0x0007FC0000000000ul) >> 42); } } @@ -1952,7 +2204,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -1998,7 +2250,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -2041,7 +2293,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -2071,7 +2323,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -2100,7 +2352,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -2149,7 +2401,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -2157,7 +2409,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -2386,7 +2638,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -2435,7 +2687,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -2450,7 +2702,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -2578,7 +2830,7 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } else if (Math.Abs(value) < Math.Pow(2d, -1024)) { @@ -2595,8 +2847,8 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context, fpcr); - FPProcessException(FPException.Inexact, context, fpcr); + SoftFloat.FPProcessException(FPException.Overflow, context, fpcr); + SoftFloat.FPProcessException(FPException.Inexact, context, fpcr); } else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) { @@ -2767,13 +3019,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context, fpcr); + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -2832,7 +3084,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -2960,7 +3212,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -3001,7 +3253,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -3031,22 +3283,22 @@ namespace ARMeilleure.Instructions return result; } - private static double FPDefaultNaN() + public static double FPDefaultNaN() { return BitConverter.Int64BitsToDouble(0x7ff8000000000000); } - private static double FPInfinity(bool sign) + public static double FPInfinity(bool sign) { return sign ? double.NegativeInfinity : double.PositiveInfinity; } - private static double FPZero(bool sign) + public static double FPZero(bool sign) { return sign ? -0d : +0d; } - private static double FPMaxNormal(bool sign) + public static double FPMaxNormal(bool sign) { return sign ? double.MinValue : double.MaxValue; } @@ -3097,7 +3349,7 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) { - FPProcessException(FPException.InputDenorm, context, fpcr); + SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -3207,7 +3459,7 @@ namespace ARMeilleure.Instructions { op |= 1ul << 51; - FPProcessException(FPException.InvalidOp, context, fpcr); + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); } if ((fpcr & FPCR.Dn) != 0) @@ -3217,19 +3469,5 @@ namespace ARMeilleure.Instructions return BitConverter.Int64BitsToDouble((long)op); } - - private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) - { - int enable = (int)exc + 8; - - if ((fpcr & (FPCR)(1 << enable)) != 0) - { - throw new NotImplementedException("Floating-point trap handling."); - } - else - { - context.Fpsr |= (FPSR)(1 << (int)exc); - } - } } } diff --git a/ARMeilleure/Translation/Delegates.cs b/ARMeilleure/Translation/Delegates.cs index 6d40dc96..57685a83 100644 --- a/ARMeilleure/Translation/Delegates.cs +++ b/ARMeilleure/Translation/Delegates.cs @@ -206,6 +206,7 @@ namespace ARMeilleure.Translation SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ))); SetDelegateInfo(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert))); + SetDelegateInfo(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))); SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAdd))); SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAddFpscr))); // A32 only. @@ -294,6 +295,8 @@ namespace ARMeilleure.Translation SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStepFused))); SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSqrt))); SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSub))); + + SetDelegateInfo(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))); } } } diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 9671a871..db8c74b3 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 3362; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 3439; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index c20e11ee..be3db0e7 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -825,6 +825,14 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _F_Cvt_S_DH_() + { + return new uint[] + { + 0x1E63C020u // FCVT H0, D1 + }; + } + private static uint[] _F_Cvt_S_HS_() { return new uint[] @@ -833,6 +841,14 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _F_Cvt_S_HD_() + { + return new uint[] + { + 0x1EE2C020u // FCVT D0, H1 + }; + } + private static uint[] _F_Cvt_ANZ_SU_S_S_() { return new uint[] @@ -1998,6 +2014,22 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] [Explicit] + public void F_Cvt_S_DH([ValueSource("_F_Cvt_S_DH_")] uint opcodes, + [ValueSource("_1D_F_")] ulong a, + [Values(RMode.Rn)] RMode rMode) + { + ulong z = TestContext.CurrentContext.Random.NextULong(); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + + int fpcr = (int)rMode << (int)Fpcr.RMode; + + SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] [Explicit] public void F_Cvt_S_HS([ValueSource("_F_Cvt_S_HS_")] uint opcodes, [ValueSource("_1H_F_")] ulong a) @@ -2011,6 +2043,19 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] [Explicit] + public void F_Cvt_S_HD([ValueSource("_F_Cvt_S_HD_")] uint opcodes, + [ValueSource("_1H_F_")] ulong a) + { + ulong z = TestContext.CurrentContext.Random.NextULong(); + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0(a); + + SingleOpcode(opcodes, v0: v0, v1: v1); + + CompareAgainstUnicorn(); + } + [Test, Pairwise] [Explicit] public void F_Cvt_ANZ_SU_S_S([ValueSource("_F_Cvt_ANZ_SU_S_S_")] uint opcodes, [ValueSource("_1S_F_W_")] ulong a)