Ryujinx/ARMeilleure/Instructions/InstEmitSimdHelper.cs
riperiperi 9db73f74cf
ARMeilleure: Respect FZ/RM flags for all floating point operations (#4618)
* ARMeilleure: Respect Fz flag for all floating point operations.

This is a change in strategy for emulating the Fz FPCR flag. Before, it was set before instructions that "needed it" and reset after. However, this missed a few hot instructions like the multiplication instruction, and the entirety of A32.

The new strategy is to set the Fz flag only in the following circumstances:

- Set to match FPCR before translated functions/loop are executed.
- Reset when calling SoftFloat methods, set when returning.
- Reset when exiting execution.

This allows us to remove the code around the existing Fz aware instructions, and get the accuracy benefits on all floating point instructions executed while in translated code.

Single step executions now need to be called with a context wrapper - right now it just contains the Fz flag initialization, and won't actually do anything on ARM.

This fixes a bug in Breath of the Wild where some physics interactions could randomly crash the game due to subnormal values not flushing to zero.

This is draft right now because I need to answer the questions:
- Does dotnet avoid changing the value of Mxcsr?
- Is it a good idea to assume that? Or should the flag set/restore be done on every managed method call, not just softfloat?
- If we assume that, do we want a unit test to verify the behaviour?

I recommend testing a bunch of games, especially games affected when this was originally added, such as #1611.

* Remove unused method

* Use FMA for Fmadd, Fmsub, Fnmadd, Fnmsub, Fmla, Fmls

...when available.

Similar implementation to A32

* Use FMA for Frecps, Frsqrts

* Don't set DAZ.

* Add round mode to ARM FP mode

* Fix mistakes

* Add test for FP state when calling managed methods

* Add explanatory comment to test.

* Cleanup

* Add A64 FPCR flags

* Vrintx_S A32 fast path on A64 backend

* Address feedback 1, re-enable DAZ

* Fix FMA instructions By Elem

* Address feedback
2023-04-10 12:22:58 +02:00

2089 lines
76 KiB
C#

using ARMeilleure.CodeGen.X86;
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using System.Reflection;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
using Func1I = Func<Operand, Operand>;
using Func2I = Func<Operand, Operand, Operand>;
using Func3I = Func<Operand, Operand, Operand, Operand>;
static class InstEmitSimdHelper
{
#region "Masks"
public static readonly long[] EvenMasks = new long[]
{
14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S
};
public static readonly long[] OddMasks = new long[]
{
15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S
};
public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
{
ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
(0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
}
#endregion
#region "X86 SSE Intrinsics"
public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
{
Intrinsic.X86Paddb,
Intrinsic.X86Paddw,
Intrinsic.X86Paddd,
Intrinsic.X86Paddq
};
public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
{
Intrinsic.X86Pcmpeqb,
Intrinsic.X86Pcmpeqw,
Intrinsic.X86Pcmpeqd,
Intrinsic.X86Pcmpeqq
};
public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
{
Intrinsic.X86Pcmpgtb,
Intrinsic.X86Pcmpgtw,
Intrinsic.X86Pcmpgtd,
Intrinsic.X86Pcmpgtq
};
public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
{
Intrinsic.X86Pmaxsb,
Intrinsic.X86Pmaxsw,
Intrinsic.X86Pmaxsd
};
public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
{
Intrinsic.X86Pmaxub,
Intrinsic.X86Pmaxuw,
Intrinsic.X86Pmaxud
};
public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
{
Intrinsic.X86Pminsb,
Intrinsic.X86Pminsw,
Intrinsic.X86Pminsd
};
public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
{
Intrinsic.X86Pminub,
Intrinsic.X86Pminuw,
Intrinsic.X86Pminud
};
public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
{
Intrinsic.X86Pmovsxbw,
Intrinsic.X86Pmovsxwd,
Intrinsic.X86Pmovsxdq
};
public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
{
Intrinsic.X86Pmovzxbw,
Intrinsic.X86Pmovzxwd,
Intrinsic.X86Pmovzxdq
};
public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
{
0,
Intrinsic.X86Psllw,
Intrinsic.X86Pslld,
Intrinsic.X86Psllq
};
public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
{
0,
Intrinsic.X86Psraw,
Intrinsic.X86Psrad
};
public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
{
0,
Intrinsic.X86Psrlw,
Intrinsic.X86Psrld,
Intrinsic.X86Psrlq
};
public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
{
Intrinsic.X86Psubb,
Intrinsic.X86Psubw,
Intrinsic.X86Psubd,
Intrinsic.X86Psubq
};
public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
{
Intrinsic.X86Punpckhbw,
Intrinsic.X86Punpckhwd,
Intrinsic.X86Punpckhdq,
Intrinsic.X86Punpckhqdq
};
public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
{
Intrinsic.X86Punpcklbw,
Intrinsic.X86Punpcklwd,
Intrinsic.X86Punpckldq,
Intrinsic.X86Punpcklqdq
};
#endregion
public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
{
if (Optimizations.UseSse2)
{
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
Operand fzTrue = getFpFlag(FPState.FzFlag);
Operand r0True = getFpFlag(FPState.RMode0Flag);
Operand r1True = getFpFlag(FPState.RMode1Flag);
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
// X86 round modes in order: nearest, negative, positive, zero
// ARM round modes in order: nearest, positive, negative, zero
// Read the bits backwards to correct this.
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
}
else if (Optimizations.UseAdvSimd)
{
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
Operand fzTrue = getFpFlag(FPState.FzFlag);
Operand r0True = getFpFlag(FPState.RMode0Flag);
Operand r1True = getFpFlag(FPState.RMode1Flag);
fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
// TODO: Restore FPSR
}
}
public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
{
if (Optimizations.UseSse2)
{
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
// Unset round mode (to nearest) and ftz.
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
// Status flags would be stored here if they were used.
}
else if (Optimizations.UseAdvSimd)
{
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
// Unset round mode (to nearest) and fz.
fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
// TODO: Store FPSR
}
}
public static int GetImmShl(OpCodeSimdShImm op)
{
return op.Imm - (8 << op.Size);
}
public static int GetImmShr(OpCodeSimdShImm op)
{
return (8 << (op.Size + 1)) - op.Imm;
}
public static Operand X86GetScalar(ArmEmitterContext context, float value)
{
return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
}
public static Operand X86GetScalar(ArmEmitterContext context, double value)
{
return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
}
public static Operand X86GetScalar(ArmEmitterContext context, int value)
{
return context.VectorCreateScalar(Const(value));
}
public static Operand X86GetScalar(ArmEmitterContext context, long value)
{
return context.VectorCreateScalar(Const(value));
}
public static Operand X86GetAllElements(ArmEmitterContext context, float value)
{
return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
}
public static Operand X86GetAllElements(ArmEmitterContext context, double value)
{
return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
}
public static Operand X86GetAllElements(ArmEmitterContext context, short value)
{
ulong value1 = (ushort)value;
ulong value2 = value1 << 16 | value1;
ulong value4 = value2 << 32 | value2;
return X86GetAllElements(context, (long)value4);
}
public static Operand X86GetAllElements(ArmEmitterContext context, int value)
{
Operand vector = context.VectorCreateScalar(Const(value));
vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
return vector;
}
public static Operand X86GetAllElements(ArmEmitterContext context, long value)
{
Operand vector = context.VectorCreateScalar(Const(value));
vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
return vector;
}
public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
{
return X86GetElements(context, (ulong)e1, (ulong)e0);
}
public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
{
Operand vector0 = context.VectorCreateScalar(Const(e0));
Operand vector1 = context.VectorCreateScalar(Const(e1));
return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
}
public static int X86GetRoundControl(FPRoundingMode roundMode)
{
switch (roundMode)
{
case FPRoundingMode.ToNearest: return 8 | 0; // even
case FPRoundingMode.TowardsPlusInfinity: return 8 | 2;
case FPRoundingMode.TowardsMinusInfinity: return 8 | 1;
case FPRoundingMode.TowardsZero: return 8 | 3;
}
throw new ArgumentException($"Invalid rounding mode \"{roundMode}\".");
}
public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
{
Debug.Assert(n.Type == OperandType.V128);
Operand nCopy = context.Copy(n);
Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
if ((op.Size & 1) == 0)
{
Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
// 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
}
else
{
Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
// 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
}
return nCopy;
}
public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
{
Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
Operand c1 = Const(op.Type, 0x33L);
Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
}
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n);
if ((op.Size & 1) != 0)
{
res = context.VectorZeroUpper64(res);
}
else
{
res = context.VectorZeroUpper96(res);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n, m);
if ((op.Size & 1) != 0)
{
res = context.VectorZeroUpper64(res);
}
else
{
res = context.VectorZeroUpper96(res);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n);
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
Operand res = context.AddIntrinsic(inst, n, m);
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
MethodInfo info = (op.Size & 1) == 0
? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
: typeof(Math). GetMethod(name, new Type[] { typeof(double) });
return context.Call(info, n);
}
public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
string name = nameof(Math.Round);
MethodInfo info = (op.Size & 1) == 0
? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
: typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
return context.Call(info, n, Const((int)roundMode));
}
public static Operand EmitGetRoundingMode(ArmEmitterContext context)
{
Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
return rMode;
}
public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
{
Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
Operand lbl1 = Label();
Operand lbl2 = Label();
Operand lbl3 = Label();
Operand lblEnd = Label();
Operand rN = Const((int)FPRoundingMode.ToNearest);
Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
Operand res = context.AllocateLocal(op.Type);
Operand rMode = EmitGetRoundingMode(context);
context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
context.Branch(lblEnd);
context.MarkLabel(lbl1);
context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
context.Branch(lblEnd);
context.MarkLabel(lbl2);
context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
context.Branch(lblEnd);
context.MarkLabel(lbl3);
context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
MethodInfo info = (op.Size & 1) == 0
? typeof(SoftFloat32).GetMethod(name)
: typeof(SoftFloat64).GetMethod(name);
context.ExitArmFpMode();
context.StoreToContext();
Operand res = context.Call(info, callArgs);
context.LoadFromContext();
context.EnterArmFpMode();
return res;
}
public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
}
public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
}
public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
}
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
}
public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
}
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int elems = op.GetBytesCount() >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
res = context.VectorInsert(res, emit(ne), index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int elems = op.GetBytesCount() >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
res = context.VectorInsert(res, emit(ne, me), index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int elems = op.GetBytesCount() >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
res = context.VectorInsert(res, emit(de, ne, me), index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int elems = op.GetBytesCount() >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
res = context.VectorInsert(res, emit(ne, me), index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int elems = op.GetBytesCount() >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
res = context.VectorInsert(res, emit(de, ne, me), index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand res = context.VectorZero();
Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand res = context.VectorZero();
Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand res = context.VectorZero();
Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
{
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
Operand imm = Const(op.Immediate);
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
Operand imm = Const(op.Immediate);
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenRmBinaryOp(context, emit, signed: true);
}
public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenRmBinaryOp(context, emit, signed: false);
}
private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
}
public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
}
private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
{
EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
}
public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
{
EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
}
private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
}
public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
}
private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand res = context.VectorZero();
Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
{
EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
}
public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
{
EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
}
private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand res = context.VectorZero();
Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorPairwiseOp(context, emit, signed: true);
}
public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorPairwiseOp(context, emit, signed: false);
}
private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int pairs = op.GetPairsCount() >> op.Size;
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if (op.RegisterSize == RegisterSize.Simd64)
{
Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]);
Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
}
else if (op.Size < 3)
{
Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
}
else
{
Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
}
}
public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
}
public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
}
public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
}
public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
{
EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
}
private static void EmitVectorAcrossVectorOp(
ArmEmitterContext context,
Func2I emit,
bool signed,
bool isLong)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
int elems = op.GetBytesCount() >> op.Size;
Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
for (int index = 1; index < elems; index++)
{
Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
res = emit(res, n);
}
int size = isLong ? op.Size + 1 : op.Size;
Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
for (int index = 1; index < 4; index++)
{
Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
res = emit(res, n);
}
Operand d = context.VectorInsert(context.VectorZero(), res, 0);
context.Copy(GetVec(op.Rd), d);
}
public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
Operand nCopy = context.Copy(GetVec(op.Rn));
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
Operand res = emit(emit(part0, part1), emit(part2, part3));
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
}
public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
context.Copy(GetVec(op.Rd), res);
}
public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand op0, op1;
if ((op.Size & 1) == 0)
{
const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
Operand zeroN = context.VectorZeroUpper64(n);
op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
}
else /* if ((op.Size & 1) == 1) */
{
Operand zero = context.VectorZero();
op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
}
context.Copy(GetVec(op.Rd), emit(op0, op1));
}
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand res = context.VectorZero();
int sizeF = op.Size & 1;
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
int pairs = op.GetPairsCount() >> sizeF + 2;
for (int index = 0; index < pairs; index++)
{
int pairIndex = index << 1;
Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
res = context.VectorInsert(res, emit(n0, n1), index);
res = context.VectorInsert(res, emit(m0, m1), pairs + index);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand nCopy = context.Copy(GetVec(op.Rn));
Operand mCopy = context.Copy(GetVec(op.Rm));
int sizeF = op.Size & 1;
if (sizeF == 0)
{
if (op.RegisterSize == RegisterSize.Simd64)
{
Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
Operand zero = context.VectorZero();
Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
context.Copy(GetVec(op.Rd), emit(part0, part1));
}
else /* if (op.RegisterSize == RegisterSize.Simd128) */
{
const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
context.Copy(GetVec(op.Rd), emit(part0, part1));
}
}
else /* if (sizeF == 1) */
{
Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
context.Copy(GetVec(op.Rd), emit(part0, part1));
}
}
public enum CmpCondition
{
// Legacy Sse.
Equal = 0, // Ordered, non-signaling.
LessThan = 1, // Ordered, signaling.
LessThanOrEqual = 2, // Ordered, signaling.
UnorderedQ = 3, // Non-signaling.
NotLessThan = 5, // Unordered, signaling.
NotLessThanOrEqual = 6, // Unordered, signaling.
OrderedQ = 7, // Non-signaling.
// Vex.
GreaterThanOrEqual = 13, // Ordered, signaling.
GreaterThan = 14, // Ordered, signaling.
OrderedS = 23 // Signaling.
}
[Flags]
public enum SaturatingFlags
{
None = 0,
ByElem = 1 << 0,
Scalar = 1 << 1,
Signed = 1 << 2,
Add = 1 << 3,
Sub = 1 << 4,
Accumulate = 1 << 5
}
public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
{
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
}
public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
{
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
}
public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand res = context.VectorZero();
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
Operand de;
if (op.Size <= 2)
{
de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
}
else /* if (op.Size == 3) */
{
de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
}
res = EmitVectorInsert(context, res, de, index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
{
EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
}
public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
}
public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
{
EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
}
public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, null, flags);
}
public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
{
EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
}
public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand res = context.VectorZero();
bool byElem = (flags & SaturatingFlags.ByElem) != 0;
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
bool signed = (flags & SaturatingFlags.Signed) != 0;
bool add = (flags & SaturatingFlags.Add) != 0;
bool sub = (flags & SaturatingFlags.Sub) != 0;
bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
if (add || sub)
{
for (int index = 0; index < elems; index++)
{
Operand de;
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
if (op.Size <= 2)
{
Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
}
else /* if (op.Size == 3) */
{
if (add)
{
de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
}
else /* if (sub) */
{
de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
}
}
res = EmitVectorInsert(context, res, de, index, op.Size);
}
}
else if (accumulate)
{
for (int index = 0; index < elems; index++)
{
Operand de;
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
if (op.Size <= 2)
{
Operand temp = context.Add(ne, me);
de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
}
else /* if (op.Size == 3) */
{
de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
}
res = EmitVectorInsert(context, res, de, index, op.Size);
}
}
else
{
Operand me = default;
if (byElem)
{
OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
}
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
if (!byElem)
{
me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
}
Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
res = EmitVectorInsert(context, res, de, index, op.Size);
}
}
context.Copy(GetVec(op.Rd), res);
}
[Flags]
public enum SaturatingNarrowFlags
{
Scalar = 1 << 0,
SignedSrc = 1 << 1,
SignedDst = 1 << 2,
ScalarSxSx = Scalar | SignedSrc | SignedDst,
ScalarSxZx = Scalar | SignedSrc,
ScalarZxZx = Scalar,
VectorSxSx = SignedSrc | SignedDst,
VectorSxZx = SignedSrc,
VectorZxZx = 0
}
public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
int elems = !scalar ? 8 >> op.Size : 1;
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
Operand d = GetVec(op.Rd);
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
Operand temp = signedSrc
? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
: EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
res = EmitVectorInsert(context, res, temp, part + index, op.Size);
}
context.Copy(d, res);
}
// long SignedSignSatQ(long op, int size);
public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
{
int eSize = 8 << size;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
Operand lbl1 = Label();
Operand lblEnd = Label();
Operand zeroL = Const(0L);
Operand maxT = Const((1L << (eSize - 1)) - 1L);
Operand minT = Const(-(1L << (eSize - 1)));
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
context.Copy(res, maxT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lbl1);
context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
context.Copy(res, minT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// private static ulong UnsignedSignSatQ(ulong op, int size);
public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
{
int eSize = 8 << size;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
Operand lblEnd = Label();
Operand zeroUL = Const(0UL);
Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
context.Copy(res, maxT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
// long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
{
int eSizeDst = 8 << sizeDst;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
Operand lbl1 = Label();
Operand lblEnd = Label();
Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
context.Copy(res, maxT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lbl1);
context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
context.Copy(res, minT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
// long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
{
int eSizeDst = 8 << sizeDst;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
Operand lblEnd = Label();
Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
context.Copy(res, maxT);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// long UnarySignedSatQAbsOrNeg(long op);
private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
{
Debug.Assert(op.Type == OperandType.I64);
Operand lblEnd = Label();
Operand minL = Const(long.MinValue);
Operand maxL = Const(long.MaxValue);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
context.Copy(res, maxL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// long BinarySignedSatQAdd(long op1, long op2);
public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lblEnd = Label();
Operand minL = Const(long.MinValue);
Operand maxL = Const(long.MaxValue);
Operand zeroL = Const(0L);
Operand add = context.Add(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
Operand right = context.BitwiseExclusiveOr(op1, add);
context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lblEnd = Label();
Operand maxUL = Const(ulong.MaxValue);
Operand add = context.Add(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
context.Copy(res, maxUL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// long BinarySignedSatQSub(long op1, long op2);
public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lblEnd = Label();
Operand minL = Const(long.MinValue);
Operand maxL = Const(long.MaxValue);
Operand zeroL = Const(0L);
Operand sub = context.Subtract(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
Operand left = context.BitwiseExclusiveOr(op1, op2);
Operand right = context.BitwiseExclusiveOr(op1, sub);
context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lblEnd = Label();
Operand zeroL = Const(0L);
Operand sub = context.Subtract(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
context.Copy(res, zeroL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// long BinarySignedSatQAcc(ulong op1, long op2);
private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lbl1 = Label();
Operand lbl2 = Label();
Operand lblEnd = Label();
Operand maxL = Const(long.MaxValue);
Operand zeroL = Const(0L);
Operand add = context.Add(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
context.Copy(res, maxL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lbl1);
context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
context.Copy(res, maxL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lbl2);
context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
context.Copy(res, maxL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
// ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
{
Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
Operand lbl1 = Label();
Operand lblEnd = Label();
Operand maxUL = Const(ulong.MaxValue);
Operand maxL = Const(long.MaxValue);
Operand zeroL = Const(0L);
Operand add = context.Add(op1, op2);
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
context.Copy(res, maxUL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lbl1);
context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
context.Copy(res, zeroL);
SetFpFlag(context, FPState.QcFlag, Const(1));
context.Branch(lblEnd);
context.MarkLabel(lblEnd);
return res;
}
public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
{
Operand mask;
if (single)
{
mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
}
else
{
mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
}
return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
}
public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
{
return EmitVectorExtract(context, reg, index, size, true);
}
public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
{
return EmitVectorExtract(context, reg, index, size, false);
}
public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
{
ThrowIfInvalid(index, size);
Operand res = default;
switch (size)
{
case 0:
res = context.VectorExtract8(GetVec(reg), index);
break;
case 1:
res = context.VectorExtract16(GetVec(reg), index);
break;
case 2:
res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
break;
case 3:
res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
break;
}
if (signed)
{
switch (size)
{
case 0: res = context.SignExtend8 (OperandType.I64, res); break;
case 1: res = context.SignExtend16(OperandType.I64, res); break;
case 2: res = context.SignExtend32(OperandType.I64, res); break;
}
}
else
{
switch (size)
{
case 0: res = context.ZeroExtend8 (OperandType.I64, res); break;
case 1: res = context.ZeroExtend16(OperandType.I64, res); break;
case 2: res = context.ZeroExtend32(OperandType.I64, res); break;
}
}
return res;
}
public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
{
ThrowIfInvalid(index, size);
if (size < 3 && value.Type == OperandType.I64)
{
value = context.ConvertI64ToI32(value);
}
switch (size)
{
case 0: vector = context.VectorInsert8 (vector, value, index); break;
case 1: vector = context.VectorInsert16(vector, value, index); break;
case 2: vector = context.VectorInsert (vector, value, index); break;
case 3: vector = context.VectorInsert (vector, value, index); break;
}
return vector;
}
public static void ThrowIfInvalid(int index, int size)
{
if ((uint)size > 3u)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if ((uint)index >= 16u >> size)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
}
}
}