From e5f7ff1eee81ebc852b8bc703d5f4847eb430560 Mon Sep 17 00:00:00 2001 From: sharmander Date: Tue, 4 Jan 2022 14:45:28 -0500 Subject: [PATCH] CPU - Implement FCVTMS (Vector) (#2937) * Add FCVTMS_V Implementation to Armeilleure * Fix opcode designation * Add tests * Amend Ptc version * Fix OpCode / Tests * Create Math.Floor helper method + Update implementation * Address gdk comments * Re-address gdk comments * Update ARMeilleure/Decoders/OpCodeTable.cs Co-authored-by: gdkchan * Update Tests to use 2S (4S) and 2D Co-authored-by: gdkchan --- ARMeilleure/Decoders/OpCodeTable.cs | 1 + ARMeilleure/Instructions/InstEmitSimdCvt.cs | 12 ++++++++++++ ARMeilleure/Instructions/InstName.cs | 1 + ARMeilleure/Translation/PTC/Ptc.cs | 2 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 2 ++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 17c83e030..3b3174bb4 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -285,6 +285,7 @@ namespace ARMeilleure.Decoders SetA64("0>1011100<100001110010xxxxxxxxxx", InstName.Fcvtau_V, InstEmit.Fcvtau_V, OpCodeSimd.Create); SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, OpCodeSimd.Create); SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, OpCodeSimdCvt.Create); + SetA64("0>0011100<100001101110xxxxxxxxxx", InstName.Fcvtms_V, InstEmit.Fcvtms_V, OpCodeSimd.Create); SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, OpCodeSimdCvt.Create); SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, OpCodeSimd.Create); SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, OpCodeSimd.Create); diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/ARMeilleure/Instructions/InstEmitSimdCvt.cs index a5b472ec5..e6400e068 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -217,6 +217,18 @@ namespace ARMeilleure.Instructions } } + public static void Fcvtms_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false); + } + } + public static void Fcvtmu_Gp(ArmEmitterContext context) { if (Optimizations.UseSse41) diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index a9c443f13..081a1ef5f 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -188,6 +188,7 @@ namespace ARMeilleure.Instructions Fcvtau_V, Fcvtl_V, Fcvtms_Gp, + Fcvtms_V, Fcvtmu_Gp, Fcvtn_V, Fcvtns_S, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 4375832bc..258ea9233 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 2908; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 2937; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 89c285708..c20e11ee3 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -865,6 +865,7 @@ namespace Ryujinx.Tests.Cpu { 0x0E21C800u, // FCVTAS V0.2S, V0.2S 0x2E21C800u, // FCVTAU V0.2S, V0.2S + 0x0E21B800u, // FCVTMS V0.2S, V0.2S 0x0E21A800u, // FCVTNS V0.2S, V0.2S 0x2E21A800u, // FCVTNU V0.2S, V0.2S 0x0EA1B800u, // FCVTZS V0.2S, V0.2S @@ -878,6 +879,7 @@ namespace Ryujinx.Tests.Cpu { 0x4E61C800u, // FCVTAS V0.2D, V0.2D 0x6E61C800u, // FCVTAU V0.2D, V0.2D + 0x4E61B800u, // FCVTMS V0.2D, V0.2D 0x4E61A800u, // FCVTNS V0.2D, V0.2D 0x6E61A800u, // FCVTNU V0.2D, V0.2D 0x4EE1B800u, // FCVTZS V0.2D, V0.2D