Ryujinx/ChocolArm64/Instructions/VectorHelper.cs
LDj3SNuD 16de171c44 Sse optimized the Scalar & Vector fp-to-fp conversion instructions (MNPZ & IX); added the related Tests (AMNPZ & IX). Small refactoring of existing instructions. (#676)
* Nit.

* Update InstEmitSimdCvt.cs

* Update VectorHelper.cs

* Update InstEmitSimdArithmetic.cs

* Update CpuTestSimd.cs

* Superseded.
2019-04-26 08:58:29 +10:00

646 lines
22 KiB
C#

using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class VectorHelper
{
public static void EmitCall(ILEmitterCtx context, string name64, string name128)
{
bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64;
context.EmitCall(typeof(VectorHelper), isSimd64 ? name64 : name128);
}
public static void EmitCall(ILEmitterCtx context, string mthdName)
{
context.EmitCall(typeof(VectorHelper), mthdName);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float value)
{
if (float.IsNaN(value)) return 0;
return value >= int.MaxValue ? int.MaxValue :
value <= int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float value)
{
if (float.IsNaN(value)) return 0;
return value >= long.MaxValue ? long.MaxValue :
value <= long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float value)
{
if (float.IsNaN(value)) return 0;
return value >= uint.MaxValue ? uint.MaxValue :
value <= uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float value)
{
if (float.IsNaN(value)) return 0;
return value >= ulong.MaxValue ? ulong.MaxValue :
value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double value)
{
if (double.IsNaN(value)) return 0;
return value >= int.MaxValue ? int.MaxValue :
value <= int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double value)
{
if (double.IsNaN(value)) return 0;
return value >= long.MaxValue ? long.MaxValue :
value <= long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double value)
{
if (double.IsNaN(value)) return 0;
return value >= uint.MaxValue ? uint.MaxValue :
value <= uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double value)
{
if (double.IsNaN(value)) return 0;
return value >= ulong.MaxValue ? ulong.MaxValue :
value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double Round(double value, CpuThreadState state)
{
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return Math.Round(value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return Math.Ceiling(value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return Math.Floor(value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return Math.Truncate(value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float RoundF(float value, CpuThreadState state)
{
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return MathF.Round(value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return MathF.Ceiling(value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return MathF.Floor(value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return MathF.Truncate(value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> Sse41ScalarRound(Vector128<double> upper, Vector128<double> value, CpuThreadState state)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return Sse41.RoundToNearestIntegerScalar(upper, value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return Sse41.RoundToPositiveInfinityScalar(upper, value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return Sse41.RoundToNegativeInfinityScalar(upper, value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return Sse41.RoundToZeroScalar(upper, value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Sse41ScalarRoundF(Vector128<float> upper, Vector128<float> value, CpuThreadState state)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return Sse41.RoundToNearestIntegerScalar(upper, value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return Sse41.RoundToPositiveInfinityScalar(upper, value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return Sse41.RoundToNegativeInfinityScalar(upper, value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return Sse41.RoundToZeroScalar(upper, value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> Sse41VectorRound(Vector128<double> value, CpuThreadState state)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return Sse41.RoundToNearestInteger(value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return Sse41.RoundToPositiveInfinity(value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return Sse41.RoundToNegativeInfinity(value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return Sse41.RoundToZero(value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Sse41VectorRoundF(Vector128<float> value, CpuThreadState state)
{
if (!Sse41.IsSupported)
{
throw new PlatformNotSupportedException();
}
RoundMode roundMode = state.FPRoundingMode();
if (roundMode == RoundMode.ToNearest)
{
return Sse41.RoundToNearestInteger(value); // even
}
else if (roundMode == RoundMode.TowardsPlusInfinity)
{
return Sse41.RoundToPositiveInfinity(value);
}
else if (roundMode == RoundMode.TowardsMinusInfinity)
{
return Sse41.RoundToNegativeInfinity(value);
}
else /* if (roundMode == RoundMode.TowardsZero) */
{
return Sse41.RoundToZero(value);
}
}
public static Vector128<float> Tbl1_V64(
Vector128<float> vector,
Vector128<float> tb0)
{
return Tbl(vector, 8, tb0);
}
public static Vector128<float> Tbl1_V128(
Vector128<float> vector,
Vector128<float> tb0)
{
return Tbl(vector, 16, tb0);
}
public static Vector128<float> Tbl2_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1)
{
return Tbl(vector, 8, tb0, tb1);
}
public static Vector128<float> Tbl2_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1)
{
return Tbl(vector, 16, tb0, tb1);
}
public static Vector128<float> Tbl3_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2)
{
return Tbl(vector, 8, tb0, tb1, tb2);
}
public static Vector128<float> Tbl3_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2)
{
return Tbl(vector, 16, tb0, tb1, tb2);
}
public static Vector128<float> Tbl4_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2,
Vector128<float> tb3)
{
return Tbl(vector, 8, tb0, tb1, tb2, tb3);
}
public static Vector128<float> Tbl4_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2,
Vector128<float> tb3)
{
return Tbl(vector, 16, tb0, tb1, tb2, tb3);
}
private static Vector128<float> Tbl(Vector128<float> vector, int bytes, params Vector128<float>[] tb)
{
Vector128<float> res = new Vector128<float>();
byte[] table = new byte[tb.Length * 16];
for (byte index = 0; index < tb.Length; index++)
for (byte index2 = 0; index2 < 16; index2++)
{
table[index * 16 + index2] = (byte)VectorExtractIntZx(tb[index], index2, 0);
}
for (byte index = 0; index < bytes; index++)
{
byte tblIdx = (byte)VectorExtractIntZx(vector, index, 0);
if (tblIdx < table.Length)
{
res = VectorInsertInt(table[tblIdx], res, index, 0);
}
}
return res;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(vector), index));
}
else if (Sse2.IsSupported)
{
return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long VectorExtractIntSx(Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
}
else if (size == 1)
{
return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
}
else if (size == 2)
{
return Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
}
else if (size == 3)
{
return Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
if (size == 0)
{
return (sbyte)VectorExtractIntZx(vector, index, size);
}
else if (size == 1)
{
return (short)VectorExtractIntZx(vector, index, size);
}
else if (size == 2)
{
return (int)VectorExtractIntZx(vector, index, size);
}
else if (size == 3)
{
return (long)VectorExtractIntZx(vector, index, size);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong VectorExtractIntZx(Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
}
else if (size == 1)
{
return Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
}
else if (size == 2)
{
return Sse41.Extract(Sse.StaticCast<float, uint>(vector), index);
}
else if (size == 3)
{
return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), index);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
int shortIdx = size == 0
? index >> 1
: index << (size - 1);
ushort value = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
if (size == 0)
{
return (byte)(value >> (index & 1) * 8);
}
else if (size == 1)
{
return value;
}
else if (size == 2 || size == 3)
{
ushort value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 1));
if (size == 2)
{
return (uint)(value | (value1 << 16));
}
ushort value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 2));
ushort value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 3));
return ((ulong)value << 0) |
((ulong)value1 << 16) |
((ulong)value2 << 32) |
((ulong)value3 << 48);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float VectorExtractSingle(Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
return Sse41.Extract(vector, index);
}
else if (Sse2.IsSupported)
{
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
int low = Sse2.Extract(shortVector, (byte)(index * 2 + 0));
int high = Sse2.Extract(shortVector, (byte)(index * 2 + 1));
return BitConverter.Int32BitsToSingle(low | (high << 16));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertDouble(double value, Vector128<float> vector, byte index)
{
return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(value), vector, index, 3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertInt(ulong value, Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(vector), (byte)value, index));
}
else if (size == 1)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
}
else if (size == 2)
{
return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(vector), (uint)value, index));
}
else if (size == 3)
{
return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(vector), value, index));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
int shortIdx = size == 0
? index >> 1
: index << (size - 1);
if (size == 0)
{
ushort shortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
int shift = (index & 1) * 8;
shortVal &= (ushort)(0xff00 >> shift);
shortVal |= (ushort)((byte)value << shift);
return Sse.StaticCast<ushort, float>(Sse2.Insert(shortVector, shortVal, (byte)shortIdx));
}
else if (size == 1)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
}
else if (size == 2 || size == 3)
{
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 0), (byte)(shortIdx + 0));
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 16), (byte)(shortIdx + 1));
if (size == 3)
{
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 32), (byte)(shortIdx + 2));
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 48), (byte)(shortIdx + 3));
}
return Sse.StaticCast<ushort, float>(shortVector);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertSingle(float value, Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
//Note: The if/else if is necessary to enable the JIT to
//produce a single INSERTPS instruction instead of the
//jump table fallback.
if (index == 0)
{
return Sse41.Insert(vector, value, 0x00);
}
else if (index == 1)
{
return Sse41.Insert(vector, value, 0x10);
}
else if (index == 2)
{
return Sse41.Insert(vector, value, 0x20);
}
else if (index == 3)
{
return Sse41.Insert(vector, value, 0x30);
}
else
{
throw new ArgumentOutOfRangeException(nameof(index));
}
}
else if (Sse2.IsSupported)
{
int intValue = BitConverter.SingleToInt32Bits(value);
ushort low = (ushort)(intValue >> 0);
ushort high = (ushort)(intValue >> 16);
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
shortVector = Sse2.Insert(shortVector, low, (byte)(index * 2 + 0));
shortVector = Sse2.Insert(shortVector, high, (byte)(index * 2 + 1));
return Sse.StaticCast<ushort, float>(shortVector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Sse41VectorInsertScalarSingle(float value, Vector128<float> vector)
{
//Note: 0b1110 is the mask to zero the upper bits.
return Sse41.Insert(vector, value, 0b1110);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSingleZero()
{
if (Sse.IsSupported)
{
return Sse.SetZeroVector128();
}
throw new PlatformNotSupportedException();
}
}
}