Fix Frecpe_S/V and Frsqrte_S/V (full FP emu.). Add Sse Opt. & SoftFloat Impl. for Fcmeq/ge/gt/le/lt_S/V (Reg & Zero), Faddp_S/V, Fmaxp_V, Fminp_V Inst.; add Sse Opt. for Shll_V, S/Ushll_V Inst.; improve Sse Opt. for Xtn_V Inst.. Add Tests. (#543)

* Update Optimizations.cs

* Update InstEmitSimdShift.cs

* Update InstEmitSimdHelper.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdMove.cs

* Update SoftFloat.cs

* Update InstEmitSimdCmp.cs

* Update CpuTestSimdShImm.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Nit.

* Update SoftFloat.cs

* Update InstEmitSimdArithmetic.cs

* Update InstEmitSimdHelper.cs

* Update CpuTestSimd.cs

* Explicit some implicit casts.

* Simplify some powers; nits.

* Update OpCodeTable.cs

* Update InstEmitSimdArithmetic.cs

* Update CpuTestSimdReg.cs

* Update InstEmitSimdArithmetic.cs
This commit is contained in:
LDj3SNuD 2018-12-26 18:11:36 +01:00 committed by gdkchan
parent d8f2497f15
commit 0f5b6dfbe8
11 changed files with 1808 additions and 441 deletions

View file

@ -176,12 +176,119 @@ namespace ChocolArm64.Instructions
public static void Fabd_S(ILEmitterCtx context) public static void Fabd_S(ILEmitterCtx context)
{ {
EmitScalarBinaryOpF(context, () => if (Optimizations.FastFP && Optimizations.UseSse2)
{ {
context.Emit(OpCodes.Sub); OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
EmitUnaryMathCall(context, nameof(Math.Abs)); int sizeF = op.Size & 1;
});
if (sizeF == 0)
{
Type[] typesSsv = new Type[] { typeof(float) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitStvec(op.Rd);
EmitVectorZero32_128(context, op.Rd);
}
else /* if (sizeF == 1) */
{
Type[] typesSsv = new Type[] { typeof(double) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitScalarBinaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
EmitUnaryMathCall(context, nameof(Math.Abs));
});
}
}
public static void Fabd_V(ILEmitterCtx context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
if (sizeF == 0)
{
Type[] typesSav = new Type[] { typeof(float) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] typesSav = new Type[] { typeof(double) };
Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
EmitLdvecWithCastToDouble(context, op.Rn);
EmitLdvecWithCastToDouble(context, op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
EmitUnaryMathCall(context, nameof(Math.Abs));
});
}
} }
public static void Fabs_S(ILEmitterCtx context) public static void Fabs_S(ILEmitterCtx context)
@ -321,17 +428,60 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
EmitVectorExtractF(context, op.Rn, 0, sizeF); if (Optimizations.FastFP && Optimizations.UseSse3)
EmitVectorExtractF(context, op.Rn, 1, sizeF); {
if (sizeF == 0)
{
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.Emit(OpCodes.Add); context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
EmitScalarSetF(context, op.Rd, sizeF); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
context.EmitStvec(op.Rd);
EmitVectorZero32_128(context, op.Rd);
}
else /* if (sizeF == 1) */
{
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
context.Emit(OpCodes.Dup);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
EmitStvecWithCastFromDouble(context, op.Rd);
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorExtractF(context, op.Rn, 0, sizeF);
EmitVectorExtractF(context, op.Rn, 1, sizeF);
EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
EmitScalarSetF(context, op.Rd, sizeF);
}
} }
public static void Faddp_V(ILEmitterCtx context) public static void Faddp_V(ILEmitterCtx context)
{ {
EmitVectorPairwiseOpF(context, () => context.Emit(OpCodes.Add)); if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Add));
}
else
{
EmitVectorPairwiseOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
});
}
} }
public static void Fdiv_S(ILEmitterCtx context) public static void Fdiv_S(ILEmitterCtx context)
@ -462,10 +612,18 @@ namespace ChocolArm64.Instructions
public static void Fmaxp_V(ILEmitterCtx context) public static void Fmaxp_V(ILEmitterCtx context)
{ {
EmitVectorPairwiseOpF(context, () => if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{ {
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax)); EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Max));
}); }
else
{
EmitVectorPairwiseOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax));
});
}
} }
public static void Fmin_S(ILEmitterCtx context) public static void Fmin_S(ILEmitterCtx context)
@ -518,10 +676,18 @@ namespace ChocolArm64.Instructions
public static void Fminp_V(ILEmitterCtx context) public static void Fminp_V(ILEmitterCtx context)
{ {
EmitVectorPairwiseOpF(context, () => if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{ {
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin)); EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Min));
}); }
else
{
EmitVectorPairwiseOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin));
});
}
} }
public static void Fmla_Se(ILEmitterCtx context) public static void Fmla_Se(ILEmitterCtx context)
@ -1085,18 +1251,42 @@ namespace ChocolArm64.Instructions
public static void Frecpe_S(ILEmitterCtx context) public static void Frecpe_S(ILEmitterCtx context)
{ {
EmitScalarUnaryOpF(context, () => OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
{ {
EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar));
}); }
else
{
EmitScalarUnaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
});
}
} }
public static void Frecpe_V(ILEmitterCtx context) public static void Frecpe_V(ILEmitterCtx context)
{ {
EmitVectorUnaryOpF(context, () => OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
{ {
EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate)); EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal));
}); }
else
{
EmitVectorUnaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
});
}
} }
public static void Frecps_S(ILEmitterCtx context) // Fused. public static void Frecps_S(ILEmitterCtx context) // Fused.
@ -1398,18 +1588,42 @@ namespace ChocolArm64.Instructions
public static void Frsqrte_S(ILEmitterCtx context) public static void Frsqrte_S(ILEmitterCtx context)
{ {
EmitScalarUnaryOpF(context, () => OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
{ {
EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar));
}); }
else
{
EmitScalarUnaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
});
}
} }
public static void Frsqrte_V(ILEmitterCtx context) public static void Frsqrte_V(ILEmitterCtx context)
{ {
EmitVectorUnaryOpF(context, () => OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse
&& sizeF == 0)
{ {
EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate)); EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt));
}); }
else
{
EmitVectorUnaryOpF(context, () =>
{
EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
});
}
} }
public static void Frsqrts_S(ILEmitterCtx context) // Fused. public static void Frsqrts_S(ILEmitterCtx context) // Fused.

View file

@ -15,7 +15,7 @@ namespace ChocolArm64.Instructions
{ {
public static void Cmeq_S(ILEmitterCtx context) public static void Cmeq_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Beq_S, scalar: true); EmitCmpOp(context, OpCodes.Beq_S, scalar: true);
} }
public static void Cmeq_V(ILEmitterCtx context) public static void Cmeq_V(ILEmitterCtx context)
@ -32,28 +32,28 @@ namespace ChocolArm64.Instructions
} }
else else
{ {
EmitCmp(context, OpCodes.Beq_S, scalar: false); EmitCmpOp(context, OpCodes.Beq_S, scalar: false);
} }
} }
else else
{ {
EmitCmp(context, OpCodes.Beq_S, scalar: false); EmitCmpOp(context, OpCodes.Beq_S, scalar: false);
} }
} }
public static void Cmge_S(ILEmitterCtx context) public static void Cmge_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bge_S, scalar: true); EmitCmpOp(context, OpCodes.Bge_S, scalar: true);
} }
public static void Cmge_V(ILEmitterCtx context) public static void Cmge_V(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bge_S, scalar: false); EmitCmpOp(context, OpCodes.Bge_S, scalar: false);
} }
public static void Cmgt_S(ILEmitterCtx context) public static void Cmgt_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bgt_S, scalar: true); EmitCmpOp(context, OpCodes.Bgt_S, scalar: true);
} }
public static void Cmgt_V(ILEmitterCtx context) public static void Cmgt_V(ILEmitterCtx context)
@ -70,63 +70,63 @@ namespace ChocolArm64.Instructions
} }
else else
{ {
EmitCmp(context, OpCodes.Bgt_S, scalar: false); EmitCmpOp(context, OpCodes.Bgt_S, scalar: false);
} }
} }
else else
{ {
EmitCmp(context, OpCodes.Bgt_S, scalar: false); EmitCmpOp(context, OpCodes.Bgt_S, scalar: false);
} }
} }
public static void Cmhi_S(ILEmitterCtx context) public static void Cmhi_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bgt_Un_S, scalar: true); EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: true);
} }
public static void Cmhi_V(ILEmitterCtx context) public static void Cmhi_V(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bgt_Un_S, scalar: false); EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
} }
public static void Cmhs_S(ILEmitterCtx context) public static void Cmhs_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bge_Un_S, scalar: true); EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: true);
} }
public static void Cmhs_V(ILEmitterCtx context) public static void Cmhs_V(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Bge_Un_S, scalar: false); EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
} }
public static void Cmle_S(ILEmitterCtx context) public static void Cmle_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Ble_S, scalar: true); EmitCmpOp(context, OpCodes.Ble_S, scalar: true);
} }
public static void Cmle_V(ILEmitterCtx context) public static void Cmle_V(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Ble_S, scalar: false); EmitCmpOp(context, OpCodes.Ble_S, scalar: false);
} }
public static void Cmlt_S(ILEmitterCtx context) public static void Cmlt_S(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Blt_S, scalar: true); EmitCmpOp(context, OpCodes.Blt_S, scalar: true);
} }
public static void Cmlt_V(ILEmitterCtx context) public static void Cmlt_V(ILEmitterCtx context)
{ {
EmitCmp(context, OpCodes.Blt_S, scalar: false); EmitCmpOp(context, OpCodes.Blt_S, scalar: false);
} }
public static void Cmtst_S(ILEmitterCtx context) public static void Cmtst_S(ILEmitterCtx context)
{ {
EmitCmtst(context, scalar: true); EmitCmtstOp(context, scalar: true);
} }
public static void Cmtst_V(ILEmitterCtx context) public static void Cmtst_V(ILEmitterCtx context)
{ {
EmitCmtst(context, scalar: false); EmitCmtstOp(context, scalar: false);
} }
public static void Fccmp_S(ILEmitterCtx context) public static void Fccmp_S(ILEmitterCtx context)
@ -145,7 +145,7 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblTrue); context.MarkLabel(lblTrue);
EmitFcmpE(context, signalNaNs: false); EmitFcmpOrFcmpe(context, signalNaNs: false);
context.MarkLabel(lblEnd); context.MarkLabel(lblEnd);
} }
@ -166,120 +166,152 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblTrue); context.MarkLabel(lblTrue);
EmitFcmpE(context, signalNaNs: true); EmitFcmpOrFcmpe(context, signalNaNs: true);
context.MarkLabel(lblEnd); context.MarkLabel(lblEnd);
} }
public static void Fcmeq_S(ILEmitterCtx context) public static void Fcmeq_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar), scalar: true);
} }
else else
{ {
EmitScalarFcmp(context, OpCodes.Beq_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true);
} }
} }
public static void Fcmeq_V(ILEmitterCtx context) public static void Fcmeq_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqual), scalar: false);
} }
else else
{ {
EmitVectorFcmp(context, OpCodes.Beq_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false);
} }
} }
public static void Fcmge_S(ILEmitterCtx context) public static void Fcmge_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true);
} }
else else
{ {
EmitScalarFcmp(context, OpCodes.Bge_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true);
} }
} }
public static void Fcmge_V(ILEmitterCtx context) public static void Fcmge_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false);
} }
else else
{ {
EmitVectorFcmp(context, OpCodes.Bge_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false);
} }
} }
public static void Fcmgt_S(ILEmitterCtx context) public static void Fcmgt_S(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true);
} }
else else
{ {
EmitScalarFcmp(context, OpCodes.Bgt_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true);
} }
} }
public static void Fcmgt_V(ILEmitterCtx context) public static void Fcmgt_V(ILEmitterCtx context)
{ {
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2) && Optimizations.UseSse2)
{ {
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan)); EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false);
} }
else else
{ {
EmitVectorFcmp(context, OpCodes.Bgt_S); EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false);
} }
} }
public static void Fcmle_S(ILEmitterCtx context) public static void Fcmle_S(ILEmitterCtx context)
{ {
EmitScalarFcmp(context, OpCodes.Ble_S); if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true, isLeOrLt: true);
}
else
{
EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true);
}
} }
public static void Fcmle_V(ILEmitterCtx context) public static void Fcmle_V(ILEmitterCtx context)
{ {
EmitVectorFcmp(context, OpCodes.Ble_S); if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false, isLeOrLt: true);
}
else
{
EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false);
}
} }
public static void Fcmlt_S(ILEmitterCtx context) public static void Fcmlt_S(ILEmitterCtx context)
{ {
EmitScalarFcmp(context, OpCodes.Blt_S); if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true, isLeOrLt: true);
}
else
{
EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true);
}
} }
public static void Fcmlt_V(ILEmitterCtx context) public static void Fcmlt_V(ILEmitterCtx context)
{ {
EmitVectorFcmp(context, OpCodes.Blt_S); if (Optimizations.FastFP && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false, isLeOrLt: true);
}
else
{
EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false);
}
} }
public static void Fcmp_S(ILEmitterCtx context) public static void Fcmp_S(ILEmitterCtx context)
{ {
EmitFcmpE(context, signalNaNs: false); EmitFcmpOrFcmpe(context, signalNaNs: false);
} }
public static void Fcmpe_S(ILEmitterCtx context) public static void Fcmpe_S(ILEmitterCtx context)
{ {
EmitFcmpE(context, signalNaNs: true); EmitFcmpOrFcmpe(context, signalNaNs: true);
} }
private static void EmitFcmpE(ILEmitterCtx context, bool signalNaNs) private static void EmitFcmpOrFcmpe(ILEmitterCtx context, bool signalNaNs)
{ {
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@ -430,7 +462,7 @@ namespace ChocolArm64.Instructions
{ {
context.EmitLdc_R4(0f); context.EmitLdc_R4(0f);
} }
else // if (op.Size == 1) else /* if (op.Size == 1) */
{ {
context.EmitLdc_R8(0d); context.EmitLdc_R8(0d);
} }
@ -448,7 +480,7 @@ namespace ChocolArm64.Instructions
} }
} }
private static void EmitCmp(ILEmitterCtx context, OpCode ilOp, bool scalar) private static void EmitCmpOp(ILEmitterCtx context, OpCode ilOp, bool scalar)
{ {
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
@ -492,7 +524,7 @@ namespace ChocolArm64.Instructions
} }
} }
private static void EmitCmtst(ILEmitterCtx context, bool scalar) private static void EmitCmtstOp(ILEmitterCtx context, bool scalar)
{ {
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@ -532,84 +564,134 @@ namespace ChocolArm64.Instructions
} }
} }
private static void EmitScalarFcmp(ILEmitterCtx context, OpCode ilOp) private static void EmitCmpOpF(ILEmitterCtx context, string name, bool scalar)
{
EmitFcmp(context, ilOp, 0, scalar: true);
}
private static void EmitVectorFcmp(ILEmitterCtx context, OpCode ilOp)
{ {
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
int bytes = op.GetBitsCount() >> 3; int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeF + 2; int elems = !scalar ? bytes >> sizeF + 2 : 1;
for (int index = 0; index < elems; index++) for (int index = 0; index < elems; index++)
{ {
EmitFcmp(context, ilOp, index, scalar: false); EmitVectorExtractF(context, op.Rn, index, sizeF);
if (op is OpCodeSimdReg64 binOp)
{
EmitVectorExtractF(context, binOp.Rm, index, sizeF);
}
else
{
if (sizeF == 0)
{
context.EmitLdc_R4(0f);
}
else /* if (sizeF == 1) */
{
context.EmitLdc_R8(0d);
}
}
EmitSoftFloatCall(context, name);
EmitVectorInsertF(context, op.Rd, index, sizeF);
} }
if (op.RegisterSize == RegisterSize.Simd64) if (!scalar)
{ {
EmitVectorZeroUpper(context, op.Rd); if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
if (sizeF == 0)
{
EmitVectorZero32_128(context, op.Rd);
}
else /* if (sizeF == 1) */
{
EmitVectorZeroUpper(context, op.Rd);
}
} }
} }
private static void EmitFcmp(ILEmitterCtx context, OpCode ilOp, int index, bool scalar) private static void EmitCmpSseOrSse2OpF(ILEmitterCtx context, string name, bool scalar, bool isLeOrLt = false)
{ {
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1; int sizeF = op.Size & 1;
ulong szMask = ulong.MaxValue >> (64 - (32 << sizeF)); if (sizeF == 0)
EmitVectorExtractF(context, op.Rn, index, sizeF);
if (op is OpCodeSimdReg64 binOp)
{ {
EmitVectorExtractF(context, binOp.Rm, index, sizeF); Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
}
else if (sizeF == 0) if (!isLeOrLt)
{ {
context.EmitLdc_R4(0f); context.EmitLdvec(op.Rn);
}
if (op is OpCodeSimdReg64 binOp)
{
context.EmitLdvec(binOp.Rm);
}
else
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
}
if (isLeOrLt)
{
context.EmitLdvec(op.Rn);
}
context.EmitCall(typeof(Sse).GetMethod(name, types));
context.EmitStvec(op.Rd);
if (scalar)
{
EmitVectorZero32_128(context, op.Rd);
}
else if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
} }
else /* if (sizeF == 1) */ else /* if (sizeF == 1) */
{ {
context.EmitLdc_R8(0d); Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
if (!isLeOrLt)
{
EmitLdvecWithCastToDouble(context, op.Rn);
}
if (op is OpCodeSimdReg64 binOp)
{
EmitLdvecWithCastToDouble(context, binOp.Rm);
}
else
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
}
if (isLeOrLt)
{
EmitLdvecWithCastToDouble(context, op.Rn);
}
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitStvecWithCastFromDouble(context, op.Rd);
if (scalar)
{
EmitVectorZeroUpper(context, op.Rd);
}
} }
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(ilOp, lblTrue);
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
else
{
EmitVectorInsert(context, op.Rd, index, sizeF + 2, 0);
}
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
if (scalar)
{
EmitVectorInsert(context, op.Rd, index, 3, (long)szMask);
EmitVectorZeroUpper(context, op.Rd);
}
else
{
EmitVectorInsert(context, op.Rd, index, sizeF + 2, (long)szMask);
}
context.MarkLabel(lblEnd);
} }
} }
} }

View file

@ -322,26 +322,6 @@ namespace ChocolArm64.Instructions
context.EmitCall(mthdInfo); context.EmitCall(mthdInfo);
} }
public static void EmitUnarySoftFloatCall(ILEmitterCtx context, string name)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
MethodInfo mthdInfo;
if (sizeF == 0)
{
mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) });
}
else /* if (sizeF == 1) */
{
mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) });
}
context.EmitCall(mthdInfo);
}
public static void EmitSoftFloatCall(ILEmitterCtx context, string name) public static void EmitSoftFloatCall(ILEmitterCtx context, string name)
{ {
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
@ -909,6 +889,96 @@ namespace ChocolArm64.Instructions
} }
} }
public static void EmitVectorPairwiseSseOrSse2OpF(ILEmitterCtx context, string name)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
if (sizeF == 0)
{
if (op.RegisterSize == RegisterSize.Simd64)
{
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types));
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), types));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvectmp();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow), types));
context.EmitCall(typeof(Sse).GetMethod(name, types));
context.EmitStvec(op.Rd);
}
else /* if (op.RegisterSize == RegisterSize.Simd128) */
{
Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdvec(op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(name, types));
context.EmitStvec(op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
EmitLdvecWithCastToDouble(context, op.Rn);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
EmitLdvecWithCastToDouble(context, op.Rm);
context.Emit(OpCodes.Dup);
context.EmitStvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitStvecWithCastFromDouble(context, op.Rd);
}
}
[Flags] [Flags]
public enum SaturatingFlags public enum SaturatingFlags
{ {

View file

@ -377,75 +377,47 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int elems = 8 >> op.Size; if (Optimizations.UseSsse3)
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (Optimizations.UseSse41 && op.Size < 2)
{ {
void EmitZeroVector() long[] masks = new long[]
{ {
switch (op.Size) 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
{ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break; 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break; };
}
}
//For XTN, first operand is source, second operand is 0. Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
//For XTN2, first operand is 0, second operand is source. Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
if (part != 0) Type[] typesSve = new Type[] { typeof(long), typeof(long) };
{
EmitZeroVector();
}
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); string nameMov = op.RegisterSize == RegisterSize.Simd128
? nameof(Sse.MoveLowToHigh)
: nameof(Sse.MoveHighToLow);
//Set mask to discard the upper half of the wide elements. context.EmitLdvec(op.Rd);
switch (op.Size) VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
{
case 0: context.EmitLdc_I4(0x00ff); break;
case 1: context.EmitLdc_I4(0x0000ffff); break;
}
Type wideType = IntTypesPerSizeLog2[op.Size + 1]; context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType })); EmitLdvecWithSignedCast(context, op.Rn, 0);
wideType = VectorIntTypesPerSizeLog2[op.Size + 1]; context.EmitLdc_I8(masks[op.Size]);
context.Emit(OpCodes.Dup);
Type[] wideTypes = new Type[] { wideType, wideType }; context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
if (part == 0) context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
{
EmitZeroVector();
}
//Pack values with signed saturation, the signed saturation shouldn't context.EmitStvec(op.Rd);
//saturate anything since the upper bits were masked off.
Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes));
if (part != 0)
{
//For XTN2, we additionally need to discard the upper bits
//of the target register and OR the result with it.
EmitVectorZeroUpper(context, op.Rd);
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
Type narrowType = VectorUIntTypesPerSizeLog2[op.Size];
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType }));
}
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
} }
else else
{ {
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (part != 0) if (part != 0)
{ {
context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rd);

View file

@ -22,9 +22,11 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
int shift = GetImmShl(op);
EmitScalarUnaryOpZx(context, () => EmitScalarUnaryOpZx(context, () =>
{ {
context.EmitLdc_I4(GetImmShl(op)); context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shl); context.Emit(OpCodes.Shl);
}); });
@ -34,13 +36,15 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
int shift = GetImmShl(op);
if (Optimizations.UseSse2 && op.Size > 0) if (Optimizations.UseSse2 && op.Size > 0)
{ {
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShl(op)); context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size); EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
@ -54,7 +58,7 @@ namespace ChocolArm64.Instructions
{ {
EmitVectorUnaryOpZx(context, () => EmitVectorUnaryOpZx(context, () =>
{ {
context.EmitLdc_I4(GetImmShl(op)); context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shl); context.Emit(OpCodes.Shl);
}); });
@ -67,7 +71,33 @@ namespace ChocolArm64.Instructions
int shift = 8 << op.Size; int shift = 8 << op.Size;
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift); if (Optimizations.UseSse41)
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
}
} }
public static void Shrn_V(ILEmitterCtx context) public static void Shrn_V(ILEmitterCtx context)
@ -362,7 +392,35 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); int shift = GetImmShl(op);
if (Optimizations.UseSse41)
{
Type[] typesSll = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), shift);
}
} }
public static void Sshr_S(ILEmitterCtx context) public static void Sshr_S(ILEmitterCtx context)
@ -663,7 +721,35 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op)); int shift = GetImmShl(op);
if (Optimizations.UseSse41)
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(numBytes);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
}
else
{
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
}
} }
public static void Ushr_S(ILEmitterCtx context) public static void Ushr_S(ILEmitterCtx context)

View file

@ -9,191 +9,72 @@ namespace ChocolArm64.Instructions
{ {
static SoftFloat() static SoftFloat()
{ {
RecipEstimateTable = BuildRecipEstimateTable(); RecipEstimateTable = BuildRecipEstimateTable();
InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
} }
private static readonly byte[] RecipEstimateTable; internal static readonly byte[] RecipEstimateTable;
private static readonly byte[] InvSqrtEstimateTable; internal static readonly byte[] RecipSqrtEstimateTable;
private static byte[] BuildRecipEstimateTable() private static byte[] BuildRecipEstimateTable()
{ {
byte[] table = new byte[256]; byte[] tbl = new byte[256];
for (ulong index = 0; index < 256; index++)
for (int idx = 0; idx < 256; idx++)
{ {
ulong a = index | 0x100; uint src = (uint)idx + 256u;
a = (a << 1) + 1; Debug.Assert(256u <= src && src < 512u);
ulong b = 0x80000 / a;
b = (b + 1) >> 1;
table[index] = (byte)(b & 0xFF); src = (src << 1) + 1u;
uint aux = (1u << 19) / src;
uint dst = (aux + 1u) >> 1;
Debug.Assert(256u <= dst && dst < 512u);
tbl[idx] = (byte)(dst - 256u);
} }
return table;
return tbl;
} }
private static byte[] BuildInvSqrtEstimateTable() private static byte[] BuildRecipSqrtEstimateTable()
{ {
byte[] table = new byte[512]; byte[] tbl = new byte[384];
for (ulong index = 128; index < 512; index++)
for (int idx = 0; idx < 384; idx++)
{ {
ulong a = index; uint src = (uint)idx + 128u;
if (a < 256)
Debug.Assert(128u <= src && src < 512u);
if (src < 256u)
{ {
a = (a << 1) + 1; src = (src << 1) + 1u;
} }
else else
{ {
a = (a | 1) << 1; src = (src >> 1) << 1;
src = (src + 1u) << 1;
} }
ulong b = 256; uint aux = 512u;
while (a * (b + 1) * (b + 1) < (1ul << 28))
while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
{ {
b++; aux = aux + 1u;
}
b = (b + 1) >> 1;
table[index] = (byte)(b & 0xFF);
}
return table;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float RecipEstimate(float x)
{
return (float)RecipEstimate((double)x);
}
public static double RecipEstimate(double x)
{
ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x);
ulong xSign = xBits & 0x8000000000000000;
ulong xExp = (xBits >> 52) & 0x7FF;
ulong scaled = xBits & ((1ul << 52) - 1);
if (xExp >= 2045)
{
if (xExp == 0x7ff && scaled != 0)
{
// NaN
return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
} }
// Infinity, or Out of range -> Zero uint dst = (aux + 1u) >> 1;
return BitConverter.Int64BitsToDouble((long)xSign);
Debug.Assert(256u <= dst && dst < 512u);
tbl[idx] = (byte)(dst - 256u);
} }
if (xExp == 0) return tbl;
{
if (scaled == 0)
{
// Zero -> Infinity
return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
}
// Denormal
if ((scaled & (1ul << 51)) == 0)
{
xExp = ~0ul;
scaled <<= 2;
}
else
{
scaled <<= 1;
}
}
scaled >>= 44;
scaled &= 0xFF;
ulong resultExp = (2045 - xExp) & 0x7FF;
ulong estimate = (ulong)RecipEstimateTable[scaled];
ulong fraction = estimate << 44;
if (resultExp == 0)
{
fraction >>= 1;
fraction |= 1ul << 51;
}
else if (resultExp == 0x7FF)
{
resultExp = 0;
fraction >>= 2;
fraction |= 1ul << 50;
}
ulong result = xSign | (resultExp << 52) | fraction;
return BitConverter.Int64BitsToDouble((long)result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float InvSqrtEstimate(float x)
{
return (float)InvSqrtEstimate((double)x);
}
public static double InvSqrtEstimate(double x)
{
ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x);
ulong xSign = xBits & 0x8000000000000000;
long xExp = (long)((xBits >> 52) & 0x7FF);
ulong scaled = xBits & ((1ul << 52) - 1);
if (xExp == 0x7FF && scaled != 0)
{
// NaN
return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
}
if (xExp == 0)
{
if (scaled == 0)
{
// Zero -> Infinity
return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
}
// Denormal
while ((scaled & (1 << 51)) == 0)
{
scaled <<= 1;
xExp--;
}
scaled <<= 1;
}
if (xSign != 0)
{
// Negative -> NaN
return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000);
}
if (xExp == 0x7ff && scaled == 0)
{
// Infinity -> Zero
return BitConverter.Int64BitsToDouble((long)xSign);
}
if (((ulong)xExp & 1) == 1)
{
scaled >>= 45;
scaled &= 0xFF;
scaled |= 0x80;
}
else
{
scaled >>= 44;
scaled &= 0xFF;
scaled |= 0x100;
}
ulong resultExp = ((ulong)(3068 - xExp) / 2) & 0x7FF;
ulong estimate = (ulong)InvSqrtEstimateTable[scaled];
ulong fraction = estimate << 44;
ulong result = xSign | (resultExp << 52) | fraction;
return BitConverter.Int64BitsToDouble((long)result);
} }
} }
@ -395,12 +276,12 @@ namespace ChocolArm64.Instructions
{ {
intMant++; intMant++;
if (intMant == (uint)Math.Pow(2d, f)) if (intMant == 1u << f)
{ {
biasedExp = 1u; biasedExp = 1u;
} }
if (intMant == (uint)Math.Pow(2d, f + 1)) if (intMant == 1u << (f + 1))
{ {
biasedExp++; biasedExp++;
intMant >>= 1; intMant >>= 1;
@ -409,7 +290,7 @@ namespace ChocolArm64.Instructions
float result; float result;
if (biasedExp >= (uint)Math.Pow(2d, e) - 1u) if (biasedExp >= (1u << e) - 1u)
{ {
result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
@ -666,12 +547,12 @@ namespace ChocolArm64.Instructions
{ {
intMant++; intMant++;
if (intMant == (uint)Math.Pow(2d, f)) if (intMant == 1u << f)
{ {
biasedExp = 1u; biasedExp = 1u;
} }
if (intMant == (uint)Math.Pow(2d, f + 1)) if (intMant == 1u << (f + 1))
{ {
biasedExp++; biasedExp++;
intMant >>= 1; intMant >>= 1;
@ -682,7 +563,7 @@ namespace ChocolArm64.Instructions
if (!state.GetFpcrFlag(Fpcr.Ahp)) if (!state.GetFpcrFlag(Fpcr.Ahp))
{ {
if (biasedExp >= (uint)Math.Pow(2d, e) - 1u) if (biasedExp >= (1u << e) - 1u)
{ {
resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
@ -697,7 +578,7 @@ namespace ChocolArm64.Instructions
} }
else else
{ {
if (biasedExp >= (uint)Math.Pow(2d, e)) if (biasedExp >= 1u << e)
{ {
resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
@ -826,6 +707,94 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static float FPCompareEQ(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
float result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
if (type1 == FpType.SNaN || type2 == FpType.SNaN)
{
FPProcessException(FpExc.InvalidOp, state);
}
}
else
{
result = ZerosOrOnes(value1 == value2);
}
return result;
}
public static float FPCompareGE(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
float result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
FPProcessException(FpExc.InvalidOp, state);
}
else
{
result = ZerosOrOnes(value1 >= value2);
}
return result;
}
public static float FPCompareGT(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
float result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
FPProcessException(FpExc.InvalidOp, state);
}
else
{
result = ZerosOrOnes(value1 > value2);
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float FPCompareLE(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
return FPCompareGE(value2, value1, state);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float FPCompareLT(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
return FPCompareGT(value2, value1, state);
}
public static float FPDiv(float value1, float value2, CpuThreadState state) public static float FPDiv(float value1, float value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
@ -1188,6 +1157,95 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static float FPRecipEstimate(float value, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out uint op, state);
float result;
if (type == FpType.SNaN || type == FpType.QNaN)
{
result = FPProcessNaN(type, op, state);
}
else if (type == FpType.Infinity)
{
result = FPZero(sign);
}
else if (type == FpType.Zero)
{
result = FPInfinity(sign);
FPProcessException(FpExc.DivideByZero, state);
}
else if (MathF.Abs(value) < MathF.Pow(2f, -128))
{
bool overflowToInf;
switch (state.FPRoundingMode())
{
default:
case RoundMode.ToNearest: overflowToInf = true; break;
case RoundMode.TowardsPlusInfinity: overflowToInf = !sign; break;
case RoundMode.TowardsMinusInfinity: overflowToInf = sign; break;
case RoundMode.TowardsZero: overflowToInf = false; break;
}
result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
FPProcessException(FpExc.Overflow, state);
FPProcessException(FpExc.Inexact, state);
}
else if (state.GetFpcrFlag(Fpcr.Fz) && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
{
result = FPZero(sign);
state.SetFpsrFlag(Fpsr.Ufc);
}
else
{
ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
uint exp = (op & 0x7F800000u) >> 23;
if (exp == 0u)
{
if ((fraction & 0x0008000000000000ul) == 0ul)
{
fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
exp -= 1u;
}
else
{
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
}
}
uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
uint resultExp = 253u - exp;
uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
fraction = (ulong)(estimate & 0xFFu) << 44;
if (resultExp == 0u)
{
fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
}
else if (resultExp + 1u == 0u)
{
fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
resultExp = 0u;
}
result = BitConverter.Int32BitsToSingle(
(int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
}
return result;
}
public static float FPRecipStepFused(float value1, float value2, CpuThreadState state) public static float FPRecipStepFused(float value1, float value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@ -1255,6 +1313,71 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static float FPRSqrtEstimate(float value, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out uint op, state);
float result;
if (type == FpType.SNaN || type == FpType.QNaN)
{
result = FPProcessNaN(type, op, state);
}
else if (type == FpType.Zero)
{
result = FPInfinity(sign);
FPProcessException(FpExc.DivideByZero, state);
}
else if (sign)
{
result = FPDefaultNaN();
FPProcessException(FpExc.InvalidOp, state);
}
else if (type == FpType.Infinity)
{
result = FPZero(false);
}
else
{
ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
uint exp = (op & 0x7F800000u) >> 23;
if (exp == 0u)
{
while ((fraction & 0x0008000000000000ul) == 0ul)
{
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
exp -= 1u;
}
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
}
uint scaled;
if ((exp & 1u) == 0u)
{
scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
}
else
{
scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
}
uint resultExp = (380u - exp) >> 1;
uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
}
return result;
}
public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state) public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@ -1402,6 +1525,11 @@ namespace ChocolArm64.Instructions
return sign ? -0f : +0f; return sign ? -0f : +0f;
} }
private static float FPMaxNormal(bool sign)
{
return sign ? float.MinValue : float.MaxValue;
}
private static float FPTwo(bool sign) private static float FPTwo(bool sign)
{ {
return sign ? -2f : +2f; return sign ? -2f : +2f;
@ -1417,6 +1545,11 @@ namespace ChocolArm64.Instructions
return -value; return -value;
} }
private static float ZerosOrOnes(bool zeros)
{
return BitConverter.Int32BitsToSingle(!zeros ? 0 : -1);
}
private static float FPUnpack( private static float FPUnpack(
this float value, this float value,
out FpType type, out FpType type,
@ -1658,6 +1791,94 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static double FPCompareEQ(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
double result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
if (type1 == FpType.SNaN || type2 == FpType.SNaN)
{
FPProcessException(FpExc.InvalidOp, state);
}
}
else
{
result = ZerosOrOnes(value1 == value2);
}
return result;
}
public static double FPCompareGE(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
double result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
FPProcessException(FpExc.InvalidOp, state);
}
else
{
result = ZerosOrOnes(value1 >= value2);
}
return result;
}
public static double FPCompareGT(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
double result;
if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
{
result = ZerosOrOnes(false);
FPProcessException(FpExc.InvalidOp, state);
}
else
{
result = ZerosOrOnes(value1 > value2);
}
return result;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double FPCompareLE(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
return FPCompareGE(value2, value1, state);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double FPCompareLT(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
return FPCompareGT(value2, value1, state);
}
public static double FPDiv(double value1, double value2, CpuThreadState state) public static double FPDiv(double value1, double value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
@ -2020,6 +2241,95 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static double FPRecipEstimate(double value, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
double result;
if (type == FpType.SNaN || type == FpType.QNaN)
{
result = FPProcessNaN(type, op, state);
}
else if (type == FpType.Infinity)
{
result = FPZero(sign);
}
else if (type == FpType.Zero)
{
result = FPInfinity(sign);
FPProcessException(FpExc.DivideByZero, state);
}
else if (Math.Abs(value) < Math.Pow(2d, -1024))
{
bool overflowToInf;
switch (state.FPRoundingMode())
{
default:
case RoundMode.ToNearest: overflowToInf = true; break;
case RoundMode.TowardsPlusInfinity: overflowToInf = !sign; break;
case RoundMode.TowardsMinusInfinity: overflowToInf = sign; break;
case RoundMode.TowardsZero: overflowToInf = false; break;
}
result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
FPProcessException(FpExc.Overflow, state);
FPProcessException(FpExc.Inexact, state);
}
else if (state.GetFpcrFlag(Fpcr.Fz) && (Math.Abs(value) >= Math.Pow(2d, 1022)))
{
result = FPZero(sign);
state.SetFpsrFlag(Fpsr.Ufc);
}
else
{
ulong fraction = op & 0x000FFFFFFFFFFFFFul;
uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
if (exp == 0u)
{
if ((fraction & 0x0008000000000000ul) == 0ul)
{
fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
exp -= 1u;
}
else
{
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
}
}
uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
uint resultExp = 2045u - exp;
uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
fraction = (ulong)(estimate & 0xFFu) << 44;
if (resultExp == 0u)
{
fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
}
else if (resultExp + 1u == 0u)
{
fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
resultExp = 0u;
}
result = BitConverter.Int64BitsToDouble(
(long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
}
return result;
}
public static double FPRecipStepFused(double value1, double value2, CpuThreadState state) public static double FPRecipStepFused(double value1, double value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@ -2087,6 +2397,71 @@ namespace ChocolArm64.Instructions
return result; return result;
} }
public static double FPRSqrtEstimate(double value, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
double result;
if (type == FpType.SNaN || type == FpType.QNaN)
{
result = FPProcessNaN(type, op, state);
}
else if (type == FpType.Zero)
{
result = FPInfinity(sign);
FPProcessException(FpExc.DivideByZero, state);
}
else if (sign)
{
result = FPDefaultNaN();
FPProcessException(FpExc.InvalidOp, state);
}
else if (type == FpType.Infinity)
{
result = FPZero(false);
}
else
{
ulong fraction = op & 0x000FFFFFFFFFFFFFul;
uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
if (exp == 0u)
{
while ((fraction & 0x0008000000000000ul) == 0ul)
{
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
exp -= 1u;
}
fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
}
uint scaled;
if ((exp & 1u) == 0u)
{
scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
}
else
{
scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
}
uint resultExp = (3068u - exp) >> 1;
uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
}
return result;
}
public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state) public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state)
{ {
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}"); Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@ -2234,6 +2609,11 @@ namespace ChocolArm64.Instructions
return sign ? -0d : +0d; return sign ? -0d : +0d;
} }
private static double FPMaxNormal(bool sign)
{
return sign ? double.MinValue : double.MaxValue;
}
private static double FPTwo(bool sign) private static double FPTwo(bool sign)
{ {
return sign ? -2d : +2d; return sign ? -2d : +2d;
@ -2249,6 +2629,11 @@ namespace ChocolArm64.Instructions
return -value; return -value;
} }
private static double ZerosOrOnes(bool zeros)
{
return BitConverter.Int64BitsToDouble(!zeros ? 0L : -1L);
}
private static double FPUnpack( private static double FPUnpack(
this double value, this double value,
out FpType type, out FpType type,

View file

@ -222,6 +222,7 @@ namespace ChocolArm64
SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64));
SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstEmit.Ext_V, typeof(OpCodeSimdExt64)); SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstEmit.Ext_V, typeof(OpCodeSimdExt64));
SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_S, typeof(OpCodeSimdReg64)); SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_S, typeof(OpCodeSimdReg64));
SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_V, typeof(OpCodeSimdReg64));
SetA64("000111100x100000110000xxxxxxxxxx", InstEmit.Fabs_S, typeof(OpCodeSimd64)); SetA64("000111100x100000110000xxxxxxxxxx", InstEmit.Fabs_S, typeof(OpCodeSimd64));
SetA64("0>0011101<100000111110xxxxxxxxxx", InstEmit.Fabs_V, typeof(OpCodeSimd64)); SetA64("0>0011101<100000111110xxxxxxxxxx", InstEmit.Fabs_V, typeof(OpCodeSimd64));
SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstEmit.Fadd_S, typeof(OpCodeSimdReg64)); SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstEmit.Fadd_S, typeof(OpCodeSimdReg64));

View file

@ -8,12 +8,14 @@ public static class Optimizations
private static bool _useSseIfAvailable = true; private static bool _useSseIfAvailable = true;
private static bool _useSse2IfAvailable = true; private static bool _useSse2IfAvailable = true;
private static bool _useSse3IfAvailable = true;
private static bool _useSsse3IfAvailable = true; private static bool _useSsse3IfAvailable = true;
private static bool _useSse41IfAvailable = true; private static bool _useSse41IfAvailable = true;
private static bool _useSse42IfAvailable = true; private static bool _useSse42IfAvailable = true;
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported; internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported; internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported; internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported; internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported; internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;

View file

@ -256,6 +256,112 @@ namespace Ryujinx.Tests.Cpu
#endregion #endregion
#region "ValueSource (Opcodes)" #region "ValueSource (Opcodes)"
private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_S_()
{
return new uint[]
{
0x1E20C020u, // FABS S0, S1
0x1E214020u, // FNEG S0, S1
0x5EA1F820u, // FRECPX S0, S1
0x1E21C020u // FSQRT S0, S1
};
}
private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_D_()
{
return new uint[]
{
0x1E60C020u, // FABS D0, D1
0x1E614020u, // FNEG D0, D1
0x5EE1F820u, // FRECPX D0, D1
0x1E61C020u // FSQRT D0, D1
};
}
private static uint[] _F_Abs_Neg_Sqrt_V_2S_4S_()
{
return new uint[]
{
0x0EA0F800u, // FABS V0.2S, V0.2S
0x2EA0F800u, // FNEG V0.2S, V0.2S
0x2EA1F800u // FSQRT V0.2S, V0.2S
};
}
private static uint[] _F_Abs_Neg_Sqrt_V_2D_()
{
return new uint[]
{
0x4EE0F800u, // FABS V0.2D, V0.2D
0x6EE0F800u, // FNEG V0.2D, V0.2D
0x6EE1F800u // FSQRT V0.2D, V0.2D
};
}
private static uint[] _F_Add_P_S_2SS_()
{
return new uint[]
{
0x7E30D820u // FADDP S0, V1.2S
};
}
private static uint[] _F_Add_P_S_2DD_()
{
return new uint[]
{
0x7E70D820u // FADDP D0, V1.2D
};
}
private static uint[] _F_Cm_EqGeGtLeLt_S_S_()
{
return new uint[]
{
0x5EA0D820u, // FCMEQ S0, S1, #0.0
0x7EA0C820u, // FCMGE S0, S1, #0.0
0x5EA0C820u, // FCMGT S0, S1, #0.0
0x7EA0D820u, // FCMLE S0, S1, #0.0
0x5EA0E820u // FCMLT S0, S1, #0.0
};
}
private static uint[] _F_Cm_EqGeGtLeLt_S_D_()
{
return new uint[]
{
0x5EE0D820u, // FCMEQ D0, D1, #0.0
0x7EE0C820u, // FCMGE D0, D1, #0.0
0x5EE0C820u, // FCMGT D0, D1, #0.0
0x7EE0D820u, // FCMLE D0, D1, #0.0
0x5EE0E820u // FCMLT D0, D1, #0.0
};
}
private static uint[] _F_Cm_EqGeGtLeLt_V_2S_4S_()
{
return new uint[]
{
0x0EA0D800u, // FCMEQ V0.2S, V0.2S, #0.0
0x2EA0C800u, // FCMGE V0.2S, V0.2S, #0.0
0x0EA0C800u, // FCMGT V0.2S, V0.2S, #0.0
0x2EA0D800u, // FCMLE V0.2S, V0.2S, #0.0
0x0EA0E800u // FCMLT V0.2S, V0.2S, #0.0
};
}
private static uint[] _F_Cm_EqGeGtLeLt_V_2D_()
{
return new uint[]
{
0x4EE0D800u, // FCMEQ V0.2D, V0.2D, #0.0
0x6EE0C800u, // FCMGE V0.2D, V0.2D, #0.0
0x4EE0C800u, // FCMGT V0.2D, V0.2D, #0.0
0x6EE0D800u, // FCMLE V0.2D, V0.2D, #0.0
0x4EE0E800u // FCMLT V0.2D, V0.2D, #0.0
};
}
private static uint[] _F_Cmp_Cmpe_S_S_() private static uint[] _F_Cmp_Cmpe_S_S_()
{ {
return new uint[] return new uint[]
@ -366,45 +472,39 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_S_() private static uint[] _F_Recpe_Rsqrte_S_S_()
{ {
return new uint[] return new uint[]
{ {
0x1E20C020u, // FABS S0, S1 0x5EA1D820u, // FRECPE S0, S1
0x1E214020u, // FNEG S0, S1 0x7EA1D820u // FRSQRTE S0, S1
0x5EA1F820u, // FRECPX S0, S1
0x1E21C020u // FSQRT S0, S1
}; };
} }
private static uint[] _F_Abs_Neg_Recpx_Sqrt_S_D_() private static uint[] _F_Recpe_Rsqrte_S_D_()
{ {
return new uint[] return new uint[]
{ {
0x1E60C020u, // FABS D0, D1 0x5EE1D820u, // FRECPE D0, D1
0x1E614020u, // FNEG D0, D1 0x7EE1D820u // FRSQRTE D0, D1
0x5EE1F820u, // FRECPX D0, D1
0x1E61C020u // FSQRT D0, D1
}; };
} }
private static uint[] _F_Abs_Neg_Sqrt_V_2S_4S_() private static uint[] _F_Recpe_Rsqrte_V_2S_4S_()
{ {
return new uint[] return new uint[]
{ {
0x0EA0F800u, // FABS V0.2S, V0.2S 0x0EA1D800u, // FRECPE V0.2S, V0.2S
0x2EA0F800u, // FNEG V0.2S, V0.2S 0x2EA1D800u // FRSQRTE V0.2S, V0.2S
0x2EA1F800u // FSQRT V0.2S, V0.2S
}; };
} }
private static uint[] _F_Abs_Neg_Sqrt_V_2D_() private static uint[] _F_Recpe_Rsqrte_V_2D_()
{ {
return new uint[] return new uint[]
{ {
0x4EE0F800u, // FABS V0.2D, V0.2D 0x4EE1D800u, // FRECPE V0.2D, V0.2D
0x6EE0F800u, // FNEG V0.2D, V0.2D 0x6EE1D800u // FRSQRTE V0.2D, V0.2D
0x6EE1F800u // FSQRT V0.2D, V0.2D
}; };
} }
@ -963,6 +1063,202 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise] [Explicit]
public void F_Abs_Neg_Recpx_Sqrt_S_S([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Abs_Neg_Recpx_Sqrt_S_D([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_D_")] uint opcodes,
[ValueSource("_1D_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE1(z);
Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Abs_Neg_Sqrt_V_2S_4S([ValueSource("_F_Abs_Neg_Sqrt_V_2S_4S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S>
{
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a * q);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Abs_Neg_Sqrt_V_2D([ValueSource("_F_Abs_Neg_Sqrt_V_2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a)
{
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Add_P_S_2SS([ValueSource("_F_Add_P_S_2SS_")] uint opcodes,
[ValueSource("_2S_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Add_P_S_2DD([ValueSource("_F_Add_P_S_2DD_")] uint opcodes,
[ValueSource("_1D_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE1(z);
Vector128<float> v1 = MakeVectorE0E1(a, a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGtLeLt_S_S([ValueSource("_F_Cm_EqGeGtLeLt_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGtLeLt_S_D([ValueSource("_F_Cm_EqGeGtLeLt_S_D_")] uint opcodes,
[ValueSource("_1D_F_")] ulong a)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE1(z);
Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGtLeLt_V_2S_4S([ValueSource("_F_Cm_EqGeGtLeLt_V_2S_4S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S>
{
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a * q);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGtLeLt_V_2D([ValueSource("_F_Cm_EqGeGtLeLt_V_2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a)
{
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes, public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a) [ValueSource("_1S_F_")] ulong a)
@ -1089,13 +1385,13 @@ namespace Ryujinx.Tests.Cpu
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_4H_F_")] ulong z, [ValueSource("_4H_F_")] ulong z,
[ValueSource("_4H_F_")] ulong a, [ValueSource("_4H_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q, // <4H, 8H> [Values(0b0u, 0b1u)] uint q, // <4H4S, 8H4S>
[Values(RMode.Rn)] RMode rMode) [Values(RMode.Rn)] RMode rMode)
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30); opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(q == 0u ? z : 0ul, q == 1u ? z : 0ul); Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
@ -1116,12 +1412,12 @@ namespace Ryujinx.Tests.Cpu
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_2S_F_")] ulong z, [ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a, [ValueSource("_2S_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S> [Values(0b0u, 0b1u)] uint q) // <2S2D, 4S2D>
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30); opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(q == 0u ? z : 0ul, q == 1u ? z : 0ul); Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul); Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1); SingleOpcode(opcodes, v0: v0, v1: v1);
@ -1135,7 +1431,7 @@ namespace Ryujinx.Tests.Cpu
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_2S_F_")] ulong z, [ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a, [ValueSource("_2S_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q, // <4H, 8H> [Values(0b0u, 0b1u)] uint q, // <4S4H, 4S8H>
[Values(RMode.Rn)] RMode rMode) [Values(RMode.Rn)] RMode rMode)
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
@ -1162,7 +1458,7 @@ namespace Ryujinx.Tests.Cpu
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_1D_F_")] ulong z, [ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a, [ValueSource("_1D_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S> [Values(0b0u, 0b1u)] uint q) // <2D2S, 2D4S>
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30); opcodes |= ((q & 1) << 30);
@ -1176,48 +1472,53 @@ namespace Ryujinx.Tests.Cpu
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Abs_Neg_Recpx_Sqrt_S_S([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_S_")] uint opcodes, public void F_Recpe_Rsqrte_S_S([ValueSource("_F_Recpe_Rsqrte_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a) [ValueSource("_1S_F_")] ulong a,
[Values(RMode.Rn)] RMode rMode)
{ {
ulong z = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z); Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, z); Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz); int fpcr = (int)rMode << (int)Fpcr.RMode;
fpcr |= rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn); fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr); SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc); CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Ofc | Fpsr.Ufc | Fpsr.Ixc | Fpsr.Idc);
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Abs_Neg_Recpx_Sqrt_S_D([ValueSource("_F_Abs_Neg_Recpx_Sqrt_S_D_")] uint opcodes, public void F_Recpe_Rsqrte_S_D([ValueSource("_F_Recpe_Rsqrte_S_D_")] uint opcodes,
[ValueSource("_1D_F_")] ulong a) [ValueSource("_1D_F_")] ulong a,
[Values(RMode.Rn)] RMode rMode)
{ {
ulong z = TestContext.CurrentContext.Random.NextULong(); ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE1(z); Vector128<float> v0 = MakeVectorE1(z);
Vector128<float> v1 = MakeVectorE0E1(a, z); Vector128<float> v1 = MakeVectorE0(a);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz); int fpcr = (int)rMode << (int)Fpcr.RMode;
fpcr |= rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn); fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr); SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc); CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Ofc | Fpsr.Ufc | Fpsr.Ixc | Fpsr.Idc);
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Abs_Neg_Sqrt_V_2S_4S([ValueSource("_F_Abs_Neg_Sqrt_V_2S_4S_")] uint opcodes, public void F_Recpe_Rsqrte_V_2S_4S([ValueSource("_F_Recpe_Rsqrte_V_2S_4S_")] uint opcodes,
[Values(0u)] uint rd, [Values(0u)] uint rd,
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_2S_F_")] ulong z, [ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a, [ValueSource("_2S_F_")] ulong a,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S> [Values(0b0u, 0b1u)] uint q, // <2S, 4S>
[Values(RMode.Rn)] RMode rMode)
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30); opcodes |= ((q & 1) << 30);
@ -1227,20 +1528,22 @@ namespace Ryujinx.Tests.Cpu
int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz); int fpcr = (int)rMode << (int)Fpcr.RMode;
fpcr |= rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn); fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr); SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc); CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Ofc | Fpsr.Ufc | Fpsr.Ixc | Fpsr.Idc);
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Abs_Neg_Sqrt_V_2D([ValueSource("_F_Abs_Neg_Sqrt_V_2D_")] uint opcodes, public void F_Recpe_Rsqrte_V_2D([ValueSource("_F_Recpe_Rsqrte_V_2D_")] uint opcodes,
[Values(0u)] uint rd, [Values(0u)] uint rd,
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[ValueSource("_1D_F_")] ulong z, [ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a) [ValueSource("_1D_F_")] ulong a,
[Values(RMode.Rn)] RMode rMode)
{ {
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
@ -1249,12 +1552,13 @@ namespace Ryujinx.Tests.Cpu
int rnd = (int)TestContext.CurrentContext.Random.NextUInt(); int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz); int fpcr = (int)rMode << (int)Fpcr.RMode;
fpcr |= rnd & (1 << (int)Fpcr.Fz);
fpcr |= rnd & (1 << (int)Fpcr.Dn); fpcr |= rnd & (1 << (int)Fpcr.Dn);
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr); SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc); CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Ofc | Fpsr.Ufc | Fpsr.Ixc | Fpsr.Idc);
} }
[Test, Pairwise, Description("NEG <V><d>, <V><n>")] [Test, Pairwise, Description("NEG <V><d>, <V><n>")]
@ -1662,6 +1966,27 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("SHLL{2} <Vd>.<Ta>, <Vn>.<Tb>, #<shift>")]
public void Shll_V([Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong z,
[ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong a,
[Values(0b00u, 0b01u, 0b10u)] uint size, // <shift: 8, 16, 32>
[Values(0b0u, 0b1u)] uint q)
{
uint opcode = 0x2E213800; // SHLL V0.8H, V0.8B, #8
opcode |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcode |= ((size & 3) << 22);
opcode |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
SingleOpcode(opcode, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("SQABS <V><d>, <V><n>")] [Test, Pairwise, Description("SQABS <V><d>, <V><n>")]
public void Sqabs_S_B_H_S_D([Values(0u)] uint rd, public void Sqabs_S_B_H_S_D([Values(0u)] uint rd,
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,

View file

@ -206,6 +206,7 @@ namespace Ryujinx.Tests.Cpu
{ {
return new uint[] return new uint[]
{ {
0x7EA2D420u, // FABD S0, S1, S2
0x1E222820u, // FADD S0, S1, S2 0x1E222820u, // FADD S0, S1, S2
0x1E221820u, // FDIV S0, S1, S2 0x1E221820u, // FDIV S0, S1, S2
0x1E220820u, // FMUL S0, S1, S2 0x1E220820u, // FMUL S0, S1, S2
@ -218,6 +219,7 @@ namespace Ryujinx.Tests.Cpu
{ {
return new uint[] return new uint[]
{ {
0x7EE2D420u, // FABD D0, D1, D2
0x1E622820u, // FADD D0, D1, D2 0x1E622820u, // FADD D0, D1, D2
0x1E621820u, // FDIV D0, D1, D2 0x1E621820u, // FDIV D0, D1, D2
0x1E620820u, // FMUL D0, D1, D2 0x1E620820u, // FMUL D0, D1, D2
@ -226,11 +228,13 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2S_4S_() private static uint[] _F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_()
{ {
return new uint[] return new uint[]
{ {
0x2EA0D400u, // FABD V0.2S, V0.2S, V0.2S
0x0E20D400u, // FADD V0.2S, V0.2S, V0.2S 0x0E20D400u, // FADD V0.2S, V0.2S, V0.2S
0x2E20D400u, // FADDP V0.2S, V0.2S, V0.2S
0x2E20FC00u, // FDIV V0.2S, V0.2S, V0.2S 0x2E20FC00u, // FDIV V0.2S, V0.2S, V0.2S
0x2E20DC00u, // FMUL V0.2S, V0.2S, V0.2S 0x2E20DC00u, // FMUL V0.2S, V0.2S, V0.2S
0x0E20DC00u, // FMULX V0.2S, V0.2S, V0.2S 0x0E20DC00u, // FMULX V0.2S, V0.2S, V0.2S
@ -238,11 +242,13 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2D_() private static uint[] _F_Add_Div_Mul_Mulx_Sub_P_V_2D_()
{ {
return new uint[] return new uint[]
{ {
0x6EE0D400u, // FABD V0.2D, V0.2D, V0.2D
0x4E60D400u, // FADD V0.2D, V0.2D, V0.2D 0x4E60D400u, // FADD V0.2D, V0.2D, V0.2D
0x6E60D400u, // FADDP V0.2D, V0.2D, V0.2D
0x6E60FC00u, // FDIV V0.2D, V0.2D, V0.2D 0x6E60FC00u, // FDIV V0.2D, V0.2D, V0.2D
0x6E60DC00u, // FMUL V0.2D, V0.2D, V0.2D 0x6E60DC00u, // FMUL V0.2D, V0.2D, V0.2D
0x4E60DC00u, // FMULX V0.2D, V0.2D, V0.2D 0x4E60DC00u, // FMULX V0.2D, V0.2D, V0.2D
@ -250,6 +256,46 @@ namespace Ryujinx.Tests.Cpu
}; };
} }
private static uint[] _F_Cm_EqGeGt_S_S_()
{
return new uint[]
{
0x5E22E420u, // FCMEQ S0, S1, S2
0x7E22E420u, // FCMGE S0, S1, S2
0x7EA2E420u // FCMGT S0, S1, S2
};
}
private static uint[] _F_Cm_EqGeGt_S_D_()
{
return new uint[]
{
0x5E62E420u, // FCMEQ D0, D1, D2
0x7E62E420u, // FCMGE D0, D1, D2
0x7EE2E420u // FCMGT D0, D1, D2
};
}
private static uint[] _F_Cm_EqGeGt_V_2S_4S_()
{
return new uint[]
{
0x0E20E400u, // FCMEQ V0.2S, V0.2S, V0.2S
0x2E20E400u, // FCMGE V0.2S, V0.2S, V0.2S
0x2EA0E400u // FCMGT V0.2S, V0.2S, V0.2S
};
}
private static uint[] _F_Cm_EqGeGt_V_2D_()
{
return new uint[]
{
0x4E60E400u, // FCMEQ V0.2D, V0.2D, V0.2D
0x6E60E400u, // FCMGE V0.2D, V0.2D, V0.2D
0x6EE0E400u // FCMGT V0.2D, V0.2D, V0.2D
};
}
private static uint[] _F_Cmp_Cmpe_S_S_() private static uint[] _F_Cmp_Cmpe_S_S_()
{ {
return new uint[] return new uint[]
@ -1285,14 +1331,14 @@ namespace Ryujinx.Tests.Cpu
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Add_Div_Mul_Mulx_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2S_4S_")] uint opcodes, public void F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_P_V_2S_4S_")] uint opcodes,
[Values(0u)] uint rd, [Values(0u)] uint rd,
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm, [Values(2u, 0u)] uint rm,
[ValueSource("_2S_F_")] ulong z, [ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a, [ValueSource("_2S_F_")] ulong a,
[ValueSource("_2S_F_")] ulong b, [ValueSource("_2S_F_")] ulong b,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S> [Values(0b0u, 0b1u)] uint q) // <2S, 4S>
{ {
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30); opcodes |= ((q & 1) << 30);
@ -1312,13 +1358,13 @@ namespace Ryujinx.Tests.Cpu
} }
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Add_Div_Mul_Mulx_Sub_V_2D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2D_")] uint opcodes, public void F_Add_Div_Mul_Mulx_Sub_P_V_2D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_P_V_2D_")] uint opcodes,
[Values(0u)] uint rd, [Values(0u)] uint rd,
[Values(1u, 0u)] uint rn, [Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm, [Values(2u, 0u)] uint rm,
[ValueSource("_1D_F_")] ulong z, [ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a, [ValueSource("_1D_F_")] ulong a,
[ValueSource("_1D_F_")] ulong b) [ValueSource("_1D_F_")] ulong b)
{ {
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0); opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
@ -1336,6 +1382,94 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Idc); CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Dzc | Fpsr.Idc);
} }
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGt_S_S([ValueSource("_F_Cm_EqGeGt_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a,
[ValueSource("_1S_F_")] ulong b)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0(a);
Vector128<float> v2 = MakeVectorE0(b);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGt_S_D([ValueSource("_F_Cm_EqGeGt_S_D_")] uint opcodes,
[ValueSource("_1D_F_")] ulong a,
[ValueSource("_1D_F_")] ulong b)
{
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE1(z);
Vector128<float> v1 = MakeVectorE0(a);
Vector128<float> v2 = MakeVectorE0(b);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGt_V_2S_4S([ValueSource("_F_Cm_EqGeGt_V_2S_4S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a,
[ValueSource("_2S_F_")] ulong b,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a * q);
Vector128<float> v2 = MakeVectorE0E1(b, b * q);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit]
public void F_Cm_EqGeGt_V_2D([ValueSource("_F_Cm_EqGeGt_V_2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[Values(2u, 0u)] uint rm,
[ValueSource("_1D_F_")] ulong z,
[ValueSource("_1D_F_")] ulong a,
[ValueSource("_1D_F_")] ulong b)
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(a, a);
Vector128<float> v2 = MakeVectorE0E1(b, b);
int rnd = (int)TestContext.CurrentContext.Random.NextUInt();
int fpcr = rnd & (1 << (int)Fpcr.Fz);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, fpcr: fpcr);
CompareAgainstUnicorn(fpsrMask: Fpsr.Ioc | Fpsr.Idc);
}
[Test, Pairwise] [Explicit] [Test, Pairwise] [Explicit]
public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes, public void F_Cmp_Cmpe_S_S([ValueSource("_F_Cmp_Cmpe_S_S_")] uint opcodes,
[ValueSource("_1S_F_")] ulong a, [ValueSource("_1S_F_")] ulong a,

View file

@ -50,6 +50,33 @@ namespace Ryujinx.Tests.Cpu
#endregion #endregion
#region "ValueSource (Opcodes)" #region "ValueSource (Opcodes)"
private static uint[] _SU_Shll_V_8B8H_16B8H_()
{
return new uint[]
{
0x0F08A400u, // SSHLL V0.8H, V0.8B, #0
0x2F08A400u // USHLL V0.8H, V0.8B, #0
};
}
private static uint[] _SU_Shll_V_4H4S_8H4S_()
{
return new uint[]
{
0x0F10A400u, // SSHLL V0.4S, V0.4H, #0
0x2F10A400u // USHLL V0.4S, V0.4H, #0
};
}
private static uint[] _SU_Shll_V_2S2D_4S2D_()
{
return new uint[]
{
0x0F20A400u, // SSHLL V0.2D, V0.2S, #0
0x2F20A400u // USHLL V0.2D, V0.2S, #0
};
}
private static uint[] _ShrImm_S_D_() private static uint[] _ShrImm_S_D_()
{ {
return new uint[] return new uint[]
@ -344,6 +371,75 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise]
public void SU_Shll_V_8B8H_16B8H([ValueSource("_SU_Shll_V_8B8H_16B8H_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_8B_")] [Random(RndCnt)] ulong z,
[ValueSource("_8B_")] [Random(RndCnt)] ulong a,
[Range(0u, 7u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <8B8H, 16B8H>
{
uint immHb = (8 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void SU_Shll_V_4H4S_8H4S([ValueSource("_SU_Shll_V_4H4S_8H4S_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_4H_")] [Random(RndCnt)] ulong z,
[ValueSource("_4H_")] [Random(RndCnt)] ulong a,
[Range(0u, 15u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <4H4S, 8H4S>
{
uint immHb = (16 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void SU_Shll_V_2S2D_4S2D([ValueSource("_SU_Shll_V_2S2D_4S2D_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource("_2S_")] [Random(RndCnt)] ulong z,
[ValueSource("_2S_")] [Random(RndCnt)] ulong a,
[Range(0u, 31u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <2S2D, 4S2D>
{
uint immHb = (32 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= ((q & 1) << 30);
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(q == 0u ? a : 0ul, q == 1u ? a : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise] [Test, Pairwise]
public void ShrImm_S_D([ValueSource("_ShrImm_S_D_")] uint opcodes, public void ShrImm_S_D([ValueSource("_ShrImm_S_D_")] uint opcodes,
[Values(0u)] uint rd, [Values(0u)] uint rd,