From 8d41402fa603a2f00ccd08239d3b938fd60715a3 Mon Sep 17 00:00:00 2001 From: merry Date: Wed, 19 Oct 2022 01:36:04 +0100 Subject: [PATCH] A32: Implement VCVTT, VCVTB (#3710) * A32: Implement VCVTT, VCVTB * A32: F16C implementation of VCVTT/VCVTB --- ARMeilleure/Decoders/OpCode32SimdCvtTB.cs | 44 ++++++++++ ARMeilleure/Decoders/OpCodeTable.cs | 1 + ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 62 +++++++++++++ .../Instructions/InstEmitSimdHelper32.cs | 16 ++++ Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs | 87 +++++++++++++++++++ 5 files changed, 210 insertions(+) create mode 100644 ARMeilleure/Decoders/OpCode32SimdCvtTB.cs diff --git a/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs b/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs new file mode 100644 index 0000000000..a95b32ab07 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtTB : OpCode32, IOpCode32Simd + { + public int Vd { get; } + public int Vm { get; } + public bool Op { get; } // Convert to Half / Convert from Half + public bool T { get; } // Top / Bottom + public int Size { get; } // Double / Single + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, true); + + public OpCode32SimdCvtTB(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Op = ((opCode >> 16) & 0x1) != 0; + T = ((opCode >> 7) & 0x1) != 0; + Size = ((opCode >> 8) & 0x1); + + RegisterSize = Size == 1 ? RegisterSize.Int64 : RegisterSize.Int32; + + if (Size == 1) + { + if (Op) + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + } + } +} \ No newline at end of file diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index b4f4b17966..f44c154006 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -828,6 +828,7 @@ namespace ARMeilleure.Decoders SetVfp("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // FP32 to int. SetVfp("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // Int to FP32. SetVfp("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // The many FP32 to int encodings (fp). + SetVfp("<<<<11101x11001xxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_TB, OpCode32SimdCvtTB.Create, OpCode32SimdCvtTB.CreateT32); SetVfp("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); SetVfp("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, OpCode32SimdDupGP.Create, OpCode32SimdDupGP.CreateT32); SetVfp("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index c76634ebf9..ba713feb7a 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -261,6 +261,68 @@ namespace ARMeilleure.Instructions } } + public static void Vcvt_TB(ArmEmitterContext context) + { + OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp; + + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + if (op.Op) + { + Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); + if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res); + } + res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + res = context.VectorExtract16(res, 0); + InsertScalar16(context, op.Vd, op.T, res); + } + else + { + Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T)); + res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res); + if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res); + } + res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0); + InsertScalar(context, op.Vd, res); + } + } + else + { + if (op.Op) + { + // Convert to half + + Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); + + MethodInfo method = op.Size == 1 + ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)) + : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)); + Operand res = context.Call(method, src); + + InsertScalar16(context, op.Vd, op.T, res); + } + else + { + // Convert from half + + Operand src = ExtractScalar16(context, op.Vm, op.T); + + MethodInfo method = op.Size == 1 + ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)) + : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)); + Operand res = context.Call(method, src); + + InsertScalar(context, op.Vd, res); + } + } + } + // VRINTA/M/N/P (floating-point). public static void Vrint_RM(ArmEmitterContext context) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 0620ea3320..84b01d05ca 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -70,6 +70,22 @@ namespace ARMeilleure.Instructions context.Copy(vec, insert); } + public static Operand ExtractScalar16(ArmEmitterContext context, int reg, bool top) + { + return context.VectorExtract16(GetVecA32(reg >> 2), ((reg & 3) << 1) | (top ? 1 : 0)); + } + + public static void InsertScalar16(ArmEmitterContext context, int reg, bool top, Operand value) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.I32); + + Operand vec, insert; + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert16(vec, value, ((reg & 3) << 1) | (top ? 1 : 0)); + + context.Copy(vec, insert); + } + public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed) { return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs index 78d5c3cc2a..0c90d0bad5 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs @@ -339,6 +339,93 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Explicit] + [Test, Pairwise, Description("VCVT.F16.F32 , ")] + public void Vcvt_F32_F16([Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1S_))] [Random(RndCnt)] uint s0, + [ValueSource(nameof(_1S_))] [Random(RndCnt)] uint s1, + [ValueSource(nameof(_1S_))] [Random(RndCnt)] uint s2, + [ValueSource(nameof(_1S_))] [Random(RndCnt)] uint s3, + [Values] bool top) + { + uint opcode = 0xeeb30a40; // VCVTB.F16.F32 S0, D0 + + if (top) + { + opcode |= 1 << 7; + } + + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); + + V128 v0 = MakeVectorE0E1E2E3(s0, s1, s2, s3); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } + + [Explicit] + [Test, Pairwise, Description("VCVT.F16.F64 , ")] + public void Vcvt_F64_F16([Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1, + [Values] bool top) + { + uint opcode = 0xeeb30b40; // VCVTB.F16.F64 S0, D0 + + if (top) + { + opcode |= 1 << 7; + } + + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + V128 v0 = MakeVectorE0E1(d0, d1); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } + + [Explicit] + [Test, Pairwise, Description("VCVT.F.F16 , ")] + public void Vcvt_F16_Fx([Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1, + [Values] bool top, + [Values] bool sz) + { + uint opcode = 0xeeb20a40; // VCVTB.F32.F16 S0, S0 + + if (top) + { + opcode |= 1 << 7; + } + + if (sz) + { + opcode |= 1 << 8; + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + } + else + { + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + V128 v0 = MakeVectorE0E1(d0, d1); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } #endif } }