From 8bf102d2cd744f56e2a4839fa0391acda3e201b8 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Tue, 30 Jan 2024 00:51:05 +0100 Subject: [PATCH] Cpu: Implement Vpadal and Vrintr instructions (#6185) * Cpu: Implement Vpadal and Vrintr instructions This PR superseed last instructions left in #2242. Since I'm not a CPU guy I've just ported the code and nothing more. Please be precise during review if there are some changes to be done. It should fixes #1781 Co-Authored-By: Piyachet Kanda * Addresses gdkchan's feedback * Addresses gdkchan's feedback 2 * Apply suggestions from code review Co-authored-by: gdkchan * another fix * Update InstEmitSimdHelper32.cs * Correct fix * Addresses gdkchan's feedback * Update CpuTestSimdCvt32.cs --------- Co-authored-by: Piyachet Kanda Co-authored-by: gdkchan --- src/ARMeilleure/Decoders/OpCodeTable.cs | 2 + .../Instructions/InstEmitSimdArithmetic32.cs | 7 ++++ .../Instructions/InstEmitSimdCvt32.cs | 16 ++++++++ .../Instructions/InstEmitSimdHelper32.cs | 29 ++++++++++++++ src/ARMeilleure/Instructions/InstName.cs | 2 + src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs | 39 +++++++++++++++++++ src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 38 ++++++++++++++++++ 7 files changed, 133 insertions(+) diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index 528cef1b61..edc004125b 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -875,6 +875,7 @@ namespace ARMeilleure.Decoders SetVfp("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); SetVfp("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110110xxxx101x01x0xxxx", InstName.Vrintr, InstEmit32.Vrintr_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx, InstEmit32.Vrintx_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, OpCode32SimdSel.Create, OpCode32SimdSel.CreateT32); @@ -995,6 +996,7 @@ namespace ARMeilleure.Decoders SetAsimd("1111001x1x000xxxxxxx< context.Add(context.Add(op1, op2), op3), op.Opc != 1); + } + public static void Vpaddl(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index 630e114c41..8eef6b14dc 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -578,6 +578,22 @@ namespace ARMeilleure.Instructions } } + // VRINTR (floating-point). + public static void Vrintr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintiS); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + // VRINTZ (floating-point). public static void Vrint_Z(ArmEmitterContext context) { diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index c1c59b87bd..2f021a1a1b 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -673,6 +673,35 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void EmitVectorPairwiseTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index * 2; + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + if (op.Size == 2) + { + m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1); + m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2); + } + + Operand d1 = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); + + res = EmitVectorInsert(context, res, emit(m1, m2, d1), op.Id + index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + // Narrow public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs index 6723a42e4c..457abbf495 100644 --- a/src/ARMeilleure/Instructions/InstName.cs +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -637,6 +637,7 @@ namespace ARMeilleure.Instructions Vorn, Vorr, Vpadd, + Vpadal, Vpaddl, Vpmax, Vpmin, @@ -656,6 +657,7 @@ namespace ARMeilleure.Instructions Vrintm, Vrintn, Vrintp, + Vrintr, Vrintx, Vrshr, Vrshrn, diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs index 5b24432bbc..ba201a4808 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs @@ -511,6 +511,45 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("VRINTR.F , ")] + [Platform(Exclude = "Linux,MacOsX")] // Instruction isn't testable due to Unicorn. + public void Vrintr([Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rm, + [Values(2u, 3u)] uint size, + [ValueSource(nameof(_1D_F_))] ulong s0, + [ValueSource(nameof(_1D_F_))] ulong s1, + [ValueSource(nameof(_1D_F_))] ulong s2, + [Values(RMode.Rn, RMode.Rm, RMode.Rp)] RMode rMode) + { + uint opcode = 0xEEB60A40; + + V128 v0, v1, v2; + + if (size == 2) + { + opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + v0 = MakeVectorE0E1((uint)BitConverter.SingleToInt32Bits(s0), (uint)BitConverter.SingleToInt32Bits(s0)); + v1 = MakeVectorE0E1((uint)BitConverter.SingleToInt32Bits(s1), (uint)BitConverter.SingleToInt32Bits(s0)); + v2 = MakeVectorE0E1((uint)BitConverter.SingleToInt32Bits(s2), (uint)BitConverter.SingleToInt32Bits(s1)); + } + else + { + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + v0 = MakeVectorE0E1((uint)BitConverter.DoubleToInt64Bits(s0), (uint)BitConverter.DoubleToInt64Bits(s0)); + v1 = MakeVectorE0E1((uint)BitConverter.DoubleToInt64Bits(s1), (uint)BitConverter.DoubleToInt64Bits(s0)); + v2 = MakeVectorE0E1((uint)BitConverter.DoubleToInt64Bits(s2), (uint)BitConverter.DoubleToInt64Bits(s1)); + } + + opcode |= ((size & 3) << 8); + + int fpscr = (int)rMode << (int)Fpcr.RMode; + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, fpscr: fpscr); + + CompareAgainstUnicorn(); + } #endif } } diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 9d9606bbab..38e08bf895 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -908,6 +908,44 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise] + public void Vp_Add_Long_Accumulate([Values(0u, 2u, 4u, 8u)] uint rd, + [Values(0u, 2u, 4u, 8u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q, + [Values] bool unsigned) + { + uint opcode = 0xF3B00600; // VPADAL.S8 D0, Q0 + + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + if (unsigned) + { + opcode |= 1 << 7; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + opcode |= size << 18; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } #endif } }