From 7acd0e01226d64d05b2675f6ae07507039a31835 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 8 Apr 2018 21:08:57 +0200 Subject: [PATCH] Add FMUL (scalar, by element) instruction; add FRECPE, FRECPS (scalar & vector) instructions. Add 5 simple tests. (#74) * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update CpuTestSimdArithmetic.cs * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs --- ChocolArm64/AOpCodeTable.cs | 5 + .../Instruction/AInstEmitSimdArithmetic.cs | 111 +++++++++++++++++- .../Instruction/AInstEmitSimdHelper.cs | 28 ++++- Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 47 ++++++++ 4 files changed, 189 insertions(+), 2 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index b323a112b..3c1ec4bb3 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -224,6 +224,7 @@ namespace ChocolArm64 Set("1001111010101111000000xxxxxxxxxx", AInstEmit.Fmov_Itof1, typeof(AOpCodeSimdCvt)); Set("000111110x0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S, typeof(AOpCodeSimdReg)); Set("000111100x1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S, typeof(AOpCodeSimdReg)); + Set("010111111<1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg)); Set("0x0011111<0011101<100001110110xxxxxxxxxx", AInstEmit.Frecpe_V, typeof(AOpCodeSimd)); + Set("010111100x1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_S, typeof(AOpCodeSimdReg)); + Set("0>0011100<1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_V, typeof(AOpCodeSimdReg)); Set("000111100x100110010000xxxxxxxxxx", AInstEmit.Frinta_S, typeof(AOpCodeSimd)); Set("0>1011100<100001100010xxxxxxxxxx", AInstEmit.Frinta_V, typeof(AOpCodeSimd)); Set("000111100x100111110000xxxxxxxxxx", AInstEmit.Frinti_S, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index bf119a18a..721fd7eb9 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -256,6 +256,11 @@ namespace ChocolArm64.Instruction EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); } + public static void Fmul_Se(AILEmitterCtx Context) + { + EmitScalarBinaryOpByElemF(Context, () => Context.Emit(OpCodes.Mul)); + } + public static void Fmul_V(AILEmitterCtx Context) { EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); @@ -324,6 +329,110 @@ namespace ChocolArm64.Instruction }); } + public static void Frecpe_S(AILEmitterCtx Context) + { + EmitFrecpe(Context, 0, Scalar: true); + } + + public static void Frecpe_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + int Bytes = Context.CurrOp.GetBitsCount() >> 3; + + for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + { + EmitFrecpe(Context, Index, Scalar: false); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Context.EmitLdc_R4(1); + } + else /* if (SizeF == 1) */ + { + Context.EmitLdc_R8(1); + } + + EmitVectorExtractF(Context, Op.Rn, Index, SizeF); + + Context.Emit(OpCodes.Div); + + if (Scalar) + { + EmitVectorZeroAll(Context, Op.Rd); + } + + EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + } + + public static void Frecps_S(AILEmitterCtx Context) + { + EmitFrecps(Context, 0, Scalar: true); + } + + public static void Frecps_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + int Bytes = Context.CurrOp.GetBitsCount() >> 3; + + for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + { + EmitFrecps(Context, Index, Scalar: false); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Context.EmitLdc_R4(2); + } + else /* if (SizeF == 1) */ + { + Context.EmitLdc_R8(2); + } + + EmitVectorExtractF(Context, Op.Rn, Index, SizeF); + EmitVectorExtractF(Context, Op.Rm, Index, SizeF); + + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + + if (Scalar) + { + EmitVectorZeroAll(Context, Op.Rd); + } + + EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + } + public static void Frinta_S(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -745,4 +854,4 @@ namespace ChocolArm64.Instruction EmitVectorWidenRnRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Mul)); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 264919ab9..9ef9d02f7 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -120,6 +120,32 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } + public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) + { + AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp; + + EmitScalarOpByElemF(Context, Emit, Op.Index, Ternary: false); + } + + public static void EmitScalarOpByElemF(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (Ternary) + { + EmitVectorExtractF(Context, Op.Rd, 0, SizeF); + } + + EmitVectorExtractF(Context, Op.Rn, 0, SizeF); + EmitVectorExtractF(Context, Op.Rm, Elem, SizeF); + + Emit(); + + EmitScalarSetF(Context, Op.Rd, SizeF); + } + public static void EmitScalarUnaryOpSx(AILEmitterCtx Context, Action Emit) { EmitScalarOp(Context, Emit, OperFlags.Rn, true); @@ -724,4 +750,4 @@ namespace ChocolArm64.Instruction } } } -} \ No newline at end of file +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 7765253ba..ba82be31b 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -44,6 +44,53 @@ namespace Ryujinx.Tests.Cpu Assert.AreEqual(Result1, ThreadState.V0.X1); }); } + + [Test, Description("fmul s6, s1, v0.s[2]")] + public void Fmul_Se([Random(10)] float A, [Random(10)] float B) + { + AThreadState ThreadState = SingleOpcode(0x5F809826, V1: new AVec { S0 = A }, V0: new AVec { S2 = B }); + + Assert.That(ThreadState.V6.S0, Is.EqualTo(A * B)); + } + + [Test, Description("frecpe v2.4s, v0.4s")] + public void Frecpe_V([Random(100)] float A) + { + AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: new AVec { S0 = A, S1 = A, S2 = A, S3 = A }); + + Assert.That(ThreadState.V2.S0, Is.EqualTo(1 / A)); + Assert.That(ThreadState.V2.S1, Is.EqualTo(1 / A)); + Assert.That(ThreadState.V2.S2, Is.EqualTo(1 / A)); + Assert.That(ThreadState.V2.S3, Is.EqualTo(1 / A)); + } + + [Test, Description("frecpe d0, d1")] + public void Frecpe_S([Random(100)] double A) + { + AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: new AVec { D0 = A }); + + Assert.That(ThreadState.V0.D0, Is.EqualTo(1 / A)); + } + + [Test, Description("frecps v4.4s, v2.4s, v0.4s")] + public void Frecps_V([Random(10)] float A, [Random(10)] float B) + { + AThreadState ThreadState = SingleOpcode(0x4E20FC44, V2: new AVec { S0 = A, S1 = A, S2 = A, S3 = A }, + V0: new AVec { S0 = B, S1 = B, S2 = B, S3 = B }); + + Assert.That(ThreadState.V4.S0, Is.EqualTo(2 - (A * B))); + Assert.That(ThreadState.V4.S1, Is.EqualTo(2 - (A * B))); + Assert.That(ThreadState.V4.S2, Is.EqualTo(2 - (A * B))); + Assert.That(ThreadState.V4.S3, Is.EqualTo(2 - (A * B))); + } + + [Test, Description("frecps d0, d1, d2")] + public void Frecps_S([Random(10)] double A, [Random(10)] double B) + { + AThreadState ThreadState = SingleOpcode(0x5E62FC20, V1: new AVec { D0 = A }, V2: new AVec { D0 = B }); + + Assert.That(ThreadState.V0.D0, Is.EqualTo(2 - (A * B))); + } [TestCase(0x3FE66666u, false, 0x40000000u)] [TestCase(0x3F99999Au, false, 0x3F800000u)]