From 4518c52c65f64a5f7be8866a299056fdf94ef860 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 13 Aug 2018 23:10:02 +0200 Subject: [PATCH] Add Sadalp_V, Saddlp_V, Uadalp_V, Uaddlp_V instructions; add 8 Tests. (#340) * Update Instructions.cs * Update CpuTestSimd.cs * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs --- ChocolArm64/AOpCodeTable.cs | 4 + .../Instruction/AInstEmitSimdArithmetic.cs | 55 +++++ Ryujinx.Tests/Cpu/CpuTestSimd.cs | 216 ++++++++++++++++++ Ryujinx.Tests/Cpu/Tester/Instructions.cs | 168 ++++++++++++++ 4 files changed, 443 insertions(+) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 74d4915b9..e50f3f987 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -360,6 +360,8 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", AInstEmit.Sabal_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", AInstEmit.Sabd_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", AInstEmit.Sabdl_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110<<100000011010xxxxxxxxxx", AInstEmit.Sadalp_V, typeof(AOpCodeSimd)); + SetA64("0x001110<<100000001010xxxxxxxxxx", AInstEmit.Saddlp_V, typeof(AOpCodeSimd)); SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", AInstEmit.Saddw_V, typeof(AOpCodeSimdReg)); SetA64("x0011110xx100010000000xxxxxxxxxx", AInstEmit.Scvtf_Gp, typeof(AOpCodeSimdCvt)); SetA64("010111100x100001110110xxxxxxxxxx", AInstEmit.Scvtf_S, typeof(AOpCodeSimd)); @@ -429,7 +431,9 @@ namespace ChocolArm64 SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", AInstEmit.Uabal_V, typeof(AOpCodeSimdReg)); SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", AInstEmit.Uabd_V, typeof(AOpCodeSimdReg)); SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", AInstEmit.Uabdl_V, typeof(AOpCodeSimdReg)); + SetA64("0x101110<<100000011010xxxxxxxxxx", AInstEmit.Uadalp_V, typeof(AOpCodeSimd)); SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", AInstEmit.Uaddl_V, typeof(AOpCodeSimdReg)); + SetA64("0x101110<<100000001010xxxxxxxxxx", AInstEmit.Uaddlp_V, typeof(AOpCodeSimd)); SetA64("001011100x110000001110xxxxxxxxxx", AInstEmit.Uaddlv_V, typeof(AOpCodeSimd)); SetA64("01101110<<110000001110xxxxxxxxxx", AInstEmit.Uaddlv_V, typeof(AOpCodeSimd)); SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", AInstEmit.Uaddw_V, typeof(AOpCodeSimdReg)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 02e903f6f..1d7b16dd9 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -158,6 +158,41 @@ namespace ChocolArm64.Instruction Context.MarkLabel(LblTrue); } + private static void EmitAddLongPairwise(AILEmitterCtx Context, bool Signed, bool Accumulate) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; + + for (int Index = 0; Index < Pairs; Index++) + { + int Idx = Index << 1; + + EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); + + Context.Emit(OpCodes.Add); + + if (Accumulate) + { + EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); + + Context.Emit(OpCodes.Add); + } + + EmitVectorInsertTmp(Context, Index, Op.Size + 1); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + private static void EmitDoublingMultiplyHighHalf(AILEmitterCtx Context, bool Round) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; @@ -992,6 +1027,16 @@ namespace ChocolArm64.Instruction }); } + public static void Sadalp_V(AILEmitterCtx Context) + { + EmitAddLongPairwise(Context, Signed: true, Accumulate: true); + } + + public static void Saddlp_V(AILEmitterCtx Context) + { + EmitAddLongPairwise(Context, Signed: true, Accumulate: false); + } + public static void Saddw_V(AILEmitterCtx Context) { EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Add)); @@ -1213,11 +1258,21 @@ namespace ChocolArm64.Instruction }); } + public static void Uadalp_V(AILEmitterCtx Context) + { + EmitAddLongPairwise(Context, Signed: false, Accumulate: true); + } + public static void Uaddl_V(AILEmitterCtx Context) { EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); } + public static void Uaddlp_V(AILEmitterCtx Context) + { + EmitAddLongPairwise(Context, Signed: false, Accumulate: false); + } + public static void Uaddlv_V(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index bb60273a7..15162c8ed 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -1137,6 +1137,114 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Description("SADALP ., .")] + public void Sadalp_V_8B4H_4H2S_2S1D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B4H, 4H2S, 2S1D> + { + uint Opcode = 0x0E206800; // SADALP V0.4H, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + SimdFp.Sadalp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("SADALP ., .")] + public void Sadalp_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> + { + uint Opcode = 0x4E206800; // SADALP V0.8H, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Sadalp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("SADDLP ., .")] + public void Saddlp_V_8B4H_4H2S_2S1D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B4H, 4H2S, 2S1D> + { + uint Opcode = 0x0E202800; // SADDLP V0.4H, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + SimdFp.Saddlp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("SADDLP ., .")] + public void Saddlp_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> + { + uint Opcode = 0x4E202800; // SADDLP V0.8H, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Saddlp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Description("SQABS , ")] public void Sqabs_S_B_H_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -1602,6 +1710,114 @@ namespace Ryujinx.Tests.Cpu Assert.That(ThreadState.Fpsr, Is.EqualTo((int)Shared.FPSR.ToUInt32())); } + [Test, Description("UADALP ., .")] + public void Uadalp_V_8B4H_4H2S_2S1D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B4H, 4H2S, 2S1D> + { + uint Opcode = 0x2E206800; // UADALP V0.4H, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + SimdFp.Uadalp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("UADALP ., .")] + public void Uadalp_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> + { + uint Opcode = 0x6E206800; // UADALP V0.8H, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Uadalp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("UADDLP ., .")] + public void Uaddlp_V_8B4H_4H2S_2S1D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B4H, 4H2S, 2S1D> + { + uint Opcode = 0x2E202800; // UADDLP V0.4H, V0.8B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.V(1, new Bits(A)); + SimdFp.Uaddlp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("UADDLP ., .")] + public void Uaddlp_V_16B8H_8H4S_4S2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H, 8H4S, 4S2D> + { + uint Opcode = 0x6E202800; // UADDLP V0.8H, V0.16B + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Uaddlp_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Description("UQXTN , ")] public void Uqxtn_S_HB_SH_DS([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index 258737185..8e171474e 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -3060,6 +3060,90 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // sadalp_advsimd.html + public static void Sadalp_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = false; + const bool op = true; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / (2 * esize); + + bool acc = (op == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits operand = V(datasize, n); + Bits sum; + BigInteger op1; + BigInteger op2; + + Bits result = (acc ? V(datasize, d) : Zeros(datasize)); + + for (int e = 0; e <= elements - 1; e++) + { + op1 = Int(Elem(operand, 2 * e + 0, esize), unsigned); + op2 = Int(Elem(operand, 2 * e + 1, esize), unsigned); + + sum = (op1 + op2).SubBigInteger(2 * esize - 1, 0); + + Elem(result, e, 2 * esize, Elem(result, e, 2 * esize) + sum); + } + + V(d, result); + } + + // saddlp_advsimd.html + public static void Saddlp_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = false; + const bool op = false; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / (2 * esize); + + bool acc = (op == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits operand = V(datasize, n); + Bits sum; + BigInteger op1; + BigInteger op2; + + Bits result = (acc ? V(datasize, d) : Zeros(datasize)); + + for (int e = 0; e <= elements - 1; e++) + { + op1 = Int(Elem(operand, 2 * e + 0, esize), unsigned); + op2 = Int(Elem(operand, 2 * e + 1, esize), unsigned); + + sum = (op1 + op2).SubBigInteger(2 * esize - 1, 0); + + Elem(result, e, 2 * esize, Elem(result, e, 2 * esize) + sum); + } + + V(d, result); + } + // sqabs_advsimd.html#SQABS_asisdmisc_R public static void Sqabs_S(Bits size, Bits Rn, Bits Rd) { @@ -3522,6 +3606,90 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // uadalp_advsimd.html + public static void Uadalp_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = true; + const bool op = true; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / (2 * esize); + + bool acc = (op == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits operand = V(datasize, n); + Bits sum; + BigInteger op1; + BigInteger op2; + + Bits result = (acc ? V(datasize, d) : Zeros(datasize)); + + for (int e = 0; e <= elements - 1; e++) + { + op1 = Int(Elem(operand, 2 * e + 0, esize), unsigned); + op2 = Int(Elem(operand, 2 * e + 1, esize), unsigned); + + sum = (op1 + op2).SubBigInteger(2 * esize - 1, 0); + + Elem(result, e, 2 * esize, Elem(result, e, 2 * esize) + sum); + } + + V(d, result); + } + + // uaddlp_advsimd.html + public static void Uaddlp_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + const bool U = true; + const bool op = false; + + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = (Q ? 128 : 64); + int elements = datasize / (2 * esize); + + bool acc = (op == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits operand = V(datasize, n); + Bits sum; + BigInteger op1; + BigInteger op2; + + Bits result = (acc ? V(datasize, d) : Zeros(datasize)); + + for (int e = 0; e <= elements - 1; e++) + { + op1 = Int(Elem(operand, 2 * e + 0, esize), unsigned); + op2 = Int(Elem(operand, 2 * e + 1, esize), unsigned); + + sum = (op1 + op2).SubBigInteger(2 * esize - 1, 0); + + Elem(result, e, 2 * esize, Elem(result, e, 2 * esize) + sum); + } + + V(d, result); + } + // uqxtn_advsimd.html#UQXTN_asisdmisc_N public static void Uqxtn_S(Bits size, Bits Rn, Bits Rd) {