From a0c78f792012cdea060444d7cb6a36dbabb04d52 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sat, 8 Sep 2018 19:24:29 +0200 Subject: [PATCH] Fix/Add 10 Shift Right and Mls_Ve Instructions; add 14 Tests. (#407) * Update AOpCodeTable.cs * Update AInstEmitSimdShift.cs * Update ASoftFallback.cs * Update AOpCodeSimdShImm.cs * Update ABitUtils.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Create CpuTestSimdShImm.cs * Create CpuTestSimdRegElem.cs * Address PR feedback. * Nit. * Nit. --- ChocolArm64/ABitUtils.cs | 6 +- ChocolArm64/AOpCodeTable.cs | 18 +- ChocolArm64/Decoder/AOpCodeSimdShImm.cs | 4 +- .../Instruction/AInstEmitSimdArithmetic.cs | 9 + .../Instruction/AInstEmitSimdHelper.cs | 5 +- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 225 +++++++----- ChocolArm64/Instruction/ASoftFallback.cs | 86 +++++ Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs | 143 ++++++++ Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs | 344 ++++++++++++++++++ 9 files changed, 742 insertions(+), 98 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs diff --git a/ChocolArm64/ABitUtils.cs b/ChocolArm64/ABitUtils.cs index 357dd45d1..dd4162356 100644 --- a/ChocolArm64/ABitUtils.cs +++ b/ChocolArm64/ABitUtils.cs @@ -27,6 +27,10 @@ namespace ChocolArm64 return -1; } + private static readonly sbyte[] HbsNibbleTbl = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + + public static int HighestBitSetNibble(int Value) => HbsNibbleTbl[Value & 0b1111]; + public static long Replicate(long Bits, int Size) { long Output = 0; @@ -54,4 +58,4 @@ namespace ChocolArm64 return Value != 0 && (Value & (Value - 1)) == 0; } } -} \ No newline at end of file +} diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index a73466ae1..b053334f3 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -343,6 +343,7 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", AInstEmit.Mla_V, typeof(AOpCodeSimdReg)); SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", AInstEmit.Mla_Ve, typeof(AOpCodeSimdRegElem)); SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", AInstEmit.Mls_V, typeof(AOpCodeSimdReg)); + SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", AInstEmit.Mls_Ve, typeof(AOpCodeSimdRegElem)); SetA64("0x00111100000xxx0xx001xxxxxxxxxx", AInstEmit.Movi_V, typeof(AOpCodeSimdImm)); SetA64("0x00111100000xxx10x001xxxxxxxxxx", AInstEmit.Movi_V, typeof(AOpCodeSimdImm)); SetA64("0x00111100000xxx110x01xxxxxxxxxx", AInstEmit.Movi_V, typeof(AOpCodeSimdImm)); @@ -380,8 +381,9 @@ namespace ChocolArm64 SetA64("0101111000101000001010xxxxxxxxxx", AInstEmit.Sha256su0_V, typeof(AOpCodeSimd)); SetA64("01011110000xxxxx011000xxxxxxxxxx", AInstEmit.Sha256su1_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", AInstEmit.Shadd_V, typeof(AOpCodeSimdReg)); - SetA64("010111110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_S, typeof(AOpCodeSimdShImm)); - SetA64("0x0011110>>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx010101xxxxxxxxxx", AInstEmit.Shl_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx010101xxxxxxxxxx", AInstEmit.Shl_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx010101xxxxxxxxxx", AInstEmit.Shl_V, typeof(AOpCodeSimdShImm)); SetA64("0x101110<<100001001110xxxxxxxxxx", AInstEmit.Shll_V, typeof(AOpCodeSimd)); SetA64("0x00111100>>>xxx100001xxxxxxxxxx", AInstEmit.Shrn_V, typeof(AOpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", AInstEmit.Shsub_V, typeof(AOpCodeSimdReg)); @@ -415,13 +417,18 @@ namespace ChocolArm64 SetA64("01111110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", AInstEmit.Sqxtun_V, typeof(AOpCodeSimd)); SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Srhadd_V, typeof(AOpCodeSimdReg)); + SetA64("0101111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_S, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx001001xxxxxxxxxx", AInstEmit.Srshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx001101xxxxxxxxxx", AInstEmit.Srsra_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx001101xxxxxxxxxx", AInstEmit.Srsra_V, typeof(AOpCodeSimdShImm)); + SetA64("0100111101xxxxxx001101xxxxxxxxxx", AInstEmit.Srsra_V, typeof(AOpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Sshl_V, typeof(AOpCodeSimdReg)); SetA64("0x00111100>>>xxx101001xxxxxxxxxx", AInstEmit.Sshll_V, typeof(AOpCodeSimdShImm)); SetA64("0101111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_S, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_S, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", AInstEmit.Ssubl_V, typeof(AOpCodeSimdReg)); @@ -474,6 +481,12 @@ namespace ChocolArm64 SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S, typeof(AOpCodeSimd)); SetA64("0x101110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_V, typeof(AOpCodeSimd)); SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", AInstEmit.Urhadd_V, typeof(AOpCodeSimdReg)); + SetA64("0111111101xxxxxx001001xxxxxxxxxx", AInstEmit.Urshr_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001001xxxxxxxxxx", AInstEmit.Urshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx001001xxxxxxxxxx", AInstEmit.Urshr_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111101xxxxxx001101xxxxxxxxxx", AInstEmit.Ursra_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx001101xxxxxxxxxx", AInstEmit.Ursra_V, typeof(AOpCodeSimdShImm)); + SetA64("0110111101xxxxxx001101xxxxxxxxxx", AInstEmit.Ursra_V, typeof(AOpCodeSimdShImm)); SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", AInstEmit.Ushl_V, typeof(AOpCodeSimdReg)); SetA64("0x10111100>>>xxx101001xxxxxxxxxx", AInstEmit.Ushll_V, typeof(AOpCodeSimdShImm)); SetA64("0111111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_S, typeof(AOpCodeSimdShImm)); @@ -481,6 +494,7 @@ namespace ChocolArm64 SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); SetA64("01111110xx100000001110xxxxxxxxxx", AInstEmit.Usqadd_S, typeof(AOpCodeSimd)); SetA64("0>101110<<100000001110xxxxxxxxxx", AInstEmit.Usqadd_V, typeof(AOpCodeSimd)); + SetA64("0111111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_S, typeof(AOpCodeSimdShImm)); SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", AInstEmit.Usubl_V, typeof(AOpCodeSimdReg)); diff --git a/ChocolArm64/Decoder/AOpCodeSimdShImm.cs b/ChocolArm64/Decoder/AOpCodeSimdShImm.cs index 6c8398817..e6d5210f2 100644 --- a/ChocolArm64/Decoder/AOpCodeSimdShImm.cs +++ b/ChocolArm64/Decoder/AOpCodeSimdShImm.cs @@ -10,7 +10,7 @@ namespace ChocolArm64.Decoder { Imm = (OpCode >> 16) & 0x7f; - Size = ABitUtils.HighestBitSet32(Imm >> 3); + Size = ABitUtils.HighestBitSetNibble(Imm >> 3); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index a291a7e51..b9aedd07b 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -957,6 +957,15 @@ namespace ChocolArm64.Instruction }); } + public static void Mls_Ve(AILEmitterCtx Context) + { + EmitVectorTernaryOpByElemZx(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Mul_V(AILEmitterCtx Context) { EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 4ecfdae30..cb884c1ac 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -626,6 +626,9 @@ namespace ChocolArm64.Instruction int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; + EmitVectorExtract(Context, Op.Rm, Elem, Op.Size, Signed); + Context.EmitSttmp(); + for (int Index = 0; Index < Elems; Index++) { if (Ternary) @@ -634,7 +637,7 @@ namespace ChocolArm64.Instruction } EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); - EmitVectorExtract(Context, Op.Rm, Elem, Op.Size, Signed); + Context.EmitLdtmp(); Emit(); diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 6f6b56068..4dee53b9b 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -14,20 +14,24 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - EmitVectorExtractZx(Context, Op.Rn, 0, Op.Size); + EmitScalarUnaryOpZx(Context, () => + { + Context.EmitLdc_I4(GetImmShl(Op)); - Context.EmitLdc_I4(GetImmShl(Op)); - - Context.Emit(OpCodes.Shl); - - EmitScalarSet(Context, Op.Rd, Op.Size); + Context.Emit(OpCodes.Shl); + }); } public static void Shl_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - EmitVectorShImmBinaryZx(Context, () => Context.Emit(OpCodes.Shl), GetImmShl(Op)); + EmitVectorUnaryOpZx(Context, () => + { + Context.EmitLdc_I4(GetImmShl(Op)); + + Context.Emit(OpCodes.Shl); + }); } public static void Shll_V(AILEmitterCtx Context) @@ -103,15 +107,24 @@ namespace ChocolArm64.Instruction EmitVectorSaturatingNarrowOpSxSx(Context, Emit); } + public static void Srshr_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpSx(Context, ShrImmFlags.Round); + } + public static void Srshr_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + EmitVectorShrImmOpSx(Context, ShrImmFlags.Round); + } - int Shift = GetImmShr(Op); + public static void Srsra_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } - long RoundConst = 1L << (Shift - 1); - - EmitVectorRoundShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), Shift, RoundConst); + public static void Srsra_V(AILEmitterCtx Context) + { + EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); } public static void Sshl_V(AILEmitterCtx Context) @@ -128,35 +141,42 @@ namespace ChocolArm64.Instruction public static void Sshr_S(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitVectorExtractSx(Context, Op.Rn, 0, Op.Size); - - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.Emit(OpCodes.Shr); - - EmitScalarSet(Context, Op.Rd, Op.Size); + EmitShrImmOp(Context, ShrImmFlags.ScalarSx); } public static void Sshr_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + EmitShrImmOp(Context, ShrImmFlags.VectorSx); + } - EmitVectorShImmBinarySx(Context, () => Context.Emit(OpCodes.Shr), GetImmShr(Op)); + public static void Ssra_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpSx(Context, ShrImmFlags.Accumulate); } public static void Ssra_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + } - Action Emit = () => - { - Context.Emit(OpCodes.Shr); - Context.Emit(OpCodes.Add); - }; + public static void Urshr_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpZx(Context, ShrImmFlags.Round); + } - EmitVectorShImmTernarySx(Context, Emit, GetImmShr(Op)); + public static void Urshr_V(AILEmitterCtx Context) + { + EmitVectorShrImmOpZx(Context, ShrImmFlags.Round); + } + + public static void Ursra_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + + public static void Ursra_V(AILEmitterCtx Context) + { + EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); } public static void Ushl_V(AILEmitterCtx Context) @@ -173,41 +193,22 @@ namespace ChocolArm64.Instruction public static void Ushr_S(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitScalarUnaryOpZx(Context, () => - { - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.Emit(OpCodes.Shr_Un); - }); + EmitShrImmOp(Context, ShrImmFlags.ScalarZx); } public static void Ushr_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + EmitShrImmOp(Context, ShrImmFlags.VectorZx); + } - EmitVectorUnaryOpZx(Context, () => - { - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.Emit(OpCodes.Shr_Un); - }); + public static void Usra_S(AILEmitterCtx Context) + { + EmitScalarShrImmOpZx(Context, ShrImmFlags.Accumulate); } public static void Usra_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - Action Emit = () => - { - Context.EmitLdc_I4(GetImmShr(Op)); - - Context.Emit(OpCodes.Shr_Un); - Context.Emit(OpCodes.Add); - }; - - EmitVectorOp(Context, Emit, OperFlags.RdRn, Signed: false); + EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); } private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) @@ -274,78 +275,118 @@ namespace ChocolArm64.Instruction } [Flags] - private enum ShImmFlags + private enum ShrImmFlags { - None = 0, + Scalar = 1 << 0, + Signed = 1 << 1, - Signed = 1 << 0, - Ternary = 1 << 1, - Rounded = 1 << 2, + Round = 1 << 2, + Accumulate = 1 << 3, - SignedTernary = Signed | Ternary, - SignedRounded = Signed | Rounded + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 } - private static void EmitVectorShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm) + private static void EmitScalarShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags) { - EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.Signed); + EmitShrImmOp(Context, ShrImmFlags.ScalarSx | Flags); } - private static void EmitVectorShImmTernarySx(AILEmitterCtx Context, Action Emit, int Imm) + private static void EmitScalarShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags) { - EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedTernary); + EmitShrImmOp(Context, ShrImmFlags.ScalarZx | Flags); } - private static void EmitVectorShImmBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) + private static void EmitVectorShrImmOpSx(AILEmitterCtx Context, ShrImmFlags Flags) { - EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.None); + EmitShrImmOp(Context, ShrImmFlags.VectorSx | Flags); } - private static void EmitVectorRoundShImmBinarySx(AILEmitterCtx Context, Action Emit, int Imm, long Rc) + private static void EmitVectorShrImmOpZx(AILEmitterCtx Context, ShrImmFlags Flags) { - EmitVectorShImmOp(Context, Emit, Imm, ShImmFlags.SignedRounded, Rc); + EmitShrImmOp(Context, ShrImmFlags.VectorZx | Flags); } - private static void EmitVectorShImmOp(AILEmitterCtx Context, Action Emit, int Imm, ShImmFlags Flags, long Rc = 0) + private static void EmitShrImmOp(AILEmitterCtx Context, ShrImmFlags Flags) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + bool Scalar = (Flags & ShrImmFlags.Scalar) != 0; + bool Signed = (Flags & ShrImmFlags.Signed) != 0; + bool Round = (Flags & ShrImmFlags.Round) != 0; + bool Accumulate = (Flags & ShrImmFlags.Accumulate) != 0; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); int Bytes = Op.GetBitsCount() >> 3; - int Elems = Bytes >> Op.Size; - - bool Signed = (Flags & ShImmFlags.Signed) != 0; - bool Ternary = (Flags & ShImmFlags.Ternary) != 0; - bool Rounded = (Flags & ShImmFlags.Rounded) != 0; + int Elems = !Scalar ? Bytes >> Op.Size : 1; for (int Index = 0; Index < Elems; Index++) { - if (Ternary) - { - EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); - } - EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); - if (Rounded) + if (Op.Size <= 2) { - Context.EmitLdc_I8(Rc); + if (Round) + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + } + + Context.EmitLdc_I4(Shift); + + Context.Emit(Signed ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (Op.Size == 3) */ + { + EmitShrImm_64(Context, Signed, Round ? RoundConst : 0L, Shift); + } + + if (Accumulate) + { + EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); Context.Emit(OpCodes.Add); } - Context.EmitLdc_I4(Imm); - - Emit(); - - EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } - if (Op.RegisterSize == ARegisterSize.SIMD64) + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) { EmitVectorZeroUpper(Context, Op.Rd); } } + // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift; + private static void EmitShrImm_64( + AILEmitterCtx Context, + bool Signed, + long RoundConst, + int Shift) + { + if (((AOpCodeSimd)Context.CurrOp).Size < 3) + { + throw new InvalidOperationException(); + } + + Context.EmitLdc_I8(RoundConst); + Context.EmitLdc_I4(Shift); + + ASoftFallback.EmitCall(Context, Signed + ? nameof(ASoftFallback.SignedShrImm_64) + : nameof(ASoftFallback.UnsignedShrImm_64)); + } + private static void EmitVectorShImmNarrowBinarySx(AILEmitterCtx Context, Action Emit, int Imm) { EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, true); @@ -414,4 +455,4 @@ namespace ChocolArm64.Instruction Context.EmitStvec(Op.Rd); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 0ae84ab2d..a7bc10859 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -16,6 +16,92 @@ namespace ChocolArm64.Instruction Context.EmitCall(typeof(ASoftFallback), MthdName); } +#region "ShrImm_64" + public static long SignedShrImm_64(long Value, long RoundConst, int Shift) + { + if (RoundConst == 0L) + { + if (Shift <= 63) + { + return Value >> Shift; + } + else /* if (Shift == 64) */ + { + if (Value < 0L) + { + return -1L; + } + else + { + return 0L; + } + } + } + else /* if (RoundConst == 1L << (Shift - 1)) */ + { + if (Shift <= 63) + { + long Add = Value + RoundConst; + + if ((~Value & (Value ^ Add)) < 0L) + { + return (long)((ulong)Add >> Shift); + } + else + { + return Add >> Shift; + } + } + else /* if (Shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm_64(ulong Value, long RoundConst, int Shift) + { + if (RoundConst == 0L) + { + if (Shift <= 63) + { + return Value >> Shift; + } + else /* if (Shift == 64) */ + { + return 0UL; + } + } + else /* if (RoundConst == 1L << (Shift - 1)) */ + { + ulong Add = Value + (ulong)RoundConst; + + if ((Add < Value) && (Add < (ulong)RoundConst)) + { + if (Shift <= 63) + { + return (Add >> Shift) | (0x8000000000000000UL >> (Shift - 1)); + } + else /* if (Shift == 64) */ + { + return 1UL; + } + } + else + { + if (Shift <= 63) + { + return Add >> Shift; + } + else /* if (Shift == 64) */ + { + return 0UL; + } + } + } + } +#endregion + #region "Saturating" public static long SignedSrcSignedDstSatQ(long op, int Size, AThreadState State) { diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs new file mode 100644 index 000000000..4d14ab485 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs @@ -0,0 +1,143 @@ +#define SimdRegElem + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdRegElem")] // Tested: second half of 2018. + public sealed class CpuTestSimdRegElem : CpuTest + { +#if SimdRegElem + +#region "ValueSource" + private static ulong[] _2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("MLA ., ., .[]")] + public void Mla_Ve_4H_8H([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> + { + uint H = (Index & 4) >> 2; + uint L = (Index & 2) >> 1; + uint M = (Index & 1) >> 0; + + uint Opcode = 0x2F400000; // MLA V0.4H, V0.4H, V0.H[0] + Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (L << 21) | (M << 20) | (H << 11); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("MLA ., ., .[]")] + public void Mla_Ve_2S_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_2S_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint H = (Index & 2) >> 1; + uint L = (Index & 1) >> 0; + + uint Opcode = 0x2F800000; // MLA V0.2S, V0.2S, V0.S[0] + Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (L << 21) | (H << 11); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("MLS ., ., .[]")] + public void Mls_Ve_4H_8H([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> + { + uint H = (Index & 4) >> 2; + uint L = (Index & 2) >> 1; + uint M = (Index & 1) >> 0; + + uint Opcode = 0x2F404000; // MLS V0.4H, V0.4H, V0.H[0] + Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (L << 21) | (M << 20) | (H << 11); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("MLS ., ., .[]")] + public void Mls_Ve_2S_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_2S_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint H = (Index & 2) >> 1; + uint L = (Index & 1) >> 0; + + uint Opcode = 0x2F804000; // MLS V0.2S, V0.2S, V0.S[0] + Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (L << 21) | (H << 11); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs new file mode 100644 index 000000000..772852226 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs @@ -0,0 +1,344 @@ +#define SimdShImm + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdShImm")] // Tested: second half of 2018. + public sealed class CpuTestSimdShImm : CpuTest + { +#if SimdShImm + +#region "ValueSource (Types)" + private static ulong[] _1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _ShrImm_S_D_() + { + return new uint[] + { + 0x5F402400u, // SRSHR D0, D0, #64 + 0x5F403400u, // SRSRA D0, D0, #64 + 0x5F400400u, // SSHR D0, D0, #64 + 0x5F401400u, // SSRA D0, D0, #64 + 0x7F402400u, // URSHR D0, D0, #64 + 0x7F403400u, // URSRA D0, D0, #64 + 0x7F400400u, // USHR D0, D0, #64 + 0x7F401400u // USRA D0, D0, #64 + }; + } + + private static uint[] _ShrImm_V_8B_16B_() + { + return new uint[] + { + 0x0F082400u, // SRSHR V0.8B, V0.8B, #8 + 0x0F083400u, // SRSRA V0.8B, V0.8B, #8 + 0x0F080400u, // SSHR V0.8B, V0.8B, #8 + 0x0F081400u, // SSRA V0.8B, V0.8B, #8 + 0x2F082400u, // URSHR V0.8B, V0.8B, #8 + 0x2F083400u, // URSRA V0.8B, V0.8B, #8 + 0x2F080400u, // USHR V0.8B, V0.8B, #8 + 0x2F081400u // USRA V0.8B, V0.8B, #8 + }; + } + + private static uint[] _ShrImm_V_4H_8H_() + { + return new uint[] + { + 0x0F102400u, // SRSHR V0.4H, V0.4H, #16 + 0x0F103400u, // SRSRA V0.4H, V0.4H, #16 + 0x0F100400u, // SSHR V0.4H, V0.4H, #16 + 0x0F101400u, // SSRA V0.4H, V0.4H, #16 + 0x2F102400u, // URSHR V0.4H, V0.4H, #16 + 0x2F103400u, // URSRA V0.4H, V0.4H, #16 + 0x2F100400u, // USHR V0.4H, V0.4H, #16 + 0x2F101400u // USRA V0.4H, V0.4H, #16 + }; + } + + private static uint[] _ShrImm_V_2S_4S_() + { + return new uint[] + { + 0x0F202400u, // SRSHR V0.2S, V0.2S, #32 + 0x0F203400u, // SRSRA V0.2S, V0.2S, #32 + 0x0F200400u, // SSHR V0.2S, V0.2S, #32 + 0x0F201400u, // SSRA V0.2S, V0.2S, #32 + 0x2F202400u, // URSHR V0.2S, V0.2S, #32 + 0x2F203400u, // URSRA V0.2S, V0.2S, #32 + 0x2F200400u, // USHR V0.2S, V0.2S, #32 + 0x2F201400u // USRA V0.2S, V0.2S, #32 + }; + } + + private static uint[] _ShrImm_V_2D_() + { + return new uint[] + { + 0x4F402400u, // SRSHR V0.2D, V0.2D, #64 + 0x4F403400u, // SRSRA V0.2D, V0.2D, #64 + 0x4F400400u, // SSHR V0.2D, V0.2D, #64 + 0x4F401400u, // SSRA V0.2D, V0.2D, #64 + 0x6F402400u, // URSHR V0.2D, V0.2D, #64 + 0x6F403400u, // URSRA V0.2D, V0.2D, #64 + 0x6F400400u, // USHR V0.2D, V0.2D, #64 + 0x6F401400u // USRA V0.2D, V0.2D, #64 + }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("SHL , , #")] + public void Shl_S_D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(0u, 63u)] uint Shift) + { + uint ImmHB = (64 + Shift) & 0x7F; + + uint Opcode = 0x5F405400; // SHL D0, D0, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SHL ., ., #")] + public void Shl_V_8B_16B([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [Range(0u, 7u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <8B, 16B> + { + uint ImmHB = (8 + Shift) & 0x7F; + + uint Opcode = 0x0F085400; // SHL V0.8B, V0.8B, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (ImmHB << 16); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SHL ., ., #")] + public void Shl_V_4H_8H([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [Range(0u, 15u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> + { + uint ImmHB = (16 + Shift) & 0x7F; + + uint Opcode = 0x0F105400; // SHL V0.4H, V0.4H, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (ImmHB << 16); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SHL ., ., #")] + public void Shl_V_2S_4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [Range(0u, 31u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint ImmHB = (32 + Shift) & 0x7F; + + uint Opcode = 0x0F205400; // SHL V0.2S, V0.2S, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (ImmHB << 16); + Opcode |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SHL ., ., #")] + public void Shl_V_2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(0u, 63u)] uint Shift) + { + uint ImmHB = (64 + Shift) & 0x7F; + + uint Opcode = 0x4F405400; // SHL V0.2D, V0.2D, #0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImm_S_D([ValueSource("_ShrImm_S_D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(1u, 64u)] uint Shift) + { + uint ImmHB = (128 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImm_V_8B_16B([ValueSource("_ShrImm_V_8B_16B_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong A, + [Range(1u, 8u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <8B, 16B> + { + uint ImmHB = (16 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImm_V_4H_8H([ValueSource("_ShrImm_V_4H_8H_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [Range(1u, 16u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> + { + uint ImmHB = (32 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImm_V_2S_4S([ValueSource("_ShrImm_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [Range(1u, 32u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint ImmHB = (64 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImm_V_2D([ValueSource("_ShrImm_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(1u, 64u)] uint Shift) + { + uint ImmHB = (128 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } +#endif + } +}