Add Tbl_V Sse opt. with Tests. (#651)

* Add v4, v5, v30, v31 required for Tbl_V Tests.

* Add Tests for Tbl_V.

* Add Tbl_V Sse opt..

* Nit.

* Small opt. on comparison constant vector.

* Nit.

* Add EmitLd/Stvectmp2/3.

* Nit.
This commit is contained in:
LDj3SNuD 2019-03-23 19:50:19 +01:00 committed by gdkchan
parent 1b2e430e88
commit c106ae9944
4 changed files with 437 additions and 39 deletions

View file

@ -355,11 +355,69 @@ namespace ChocolArm64.Instructions
{ {
OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp; OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;
if (Optimizations.UseSsse3)
{
Type[] typesCmpSflSub = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
Type[] typesOr = new Type[] { typeof(Vector128<long> ), typeof(Vector128<long> ) };
Type[] typesSav = new Type[] { typeof(long) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitStvectmp2();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
for (int index = 1; index < op.Size; index++)
{
context.EmitLdvec((op.Rn + index) & 0x1F);
context.EmitLdvec(op.Rm);
context.EmitLdc_I8(0x1010101010101010L * index);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub));
context.EmitStvectmp();
context.EmitLdvectmp();
context.EmitLdvectmp2();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));
context.EmitLdvectmp();
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
}
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm);
for (int index = 0; index < op.Size; index++) for (int index = 0; index < op.Size; index++)
{ {
context.EmitLdvec((op.Rn + index) & 0x1f); context.EmitLdvec((op.Rn + index) & 0x1F);
} }
switch (op.Size) switch (op.Size)
@ -385,6 +443,7 @@ namespace ChocolArm64.Instructions
context.EmitStvec(op.Rd); context.EmitStvec(op.Rd);
} }
}
public static void Trn1_V(ILEmitterCtx context) public static void Trn1_V(ILEmitterCtx context)
{ {

View file

@ -61,7 +61,9 @@ namespace ChocolArm64.Translation
//Vectors are part of another "set" of locals. //Vectors are part of another "set" of locals.
private const int VecGpTmp1Index = ReservedLocalsCount + 0; private const int VecGpTmp1Index = ReservedLocalsCount + 0;
private const int UserVecTempStart = ReservedLocalsCount + 1; private const int VecGpTmp2Index = ReservedLocalsCount + 1;
private const int VecGpTmp3Index = ReservedLocalsCount + 2;
private const int UserVecTempStart = ReservedLocalsCount + 3;
private static int _userIntTempCount; private static int _userIntTempCount;
private static int _userVecTempCount; private static int _userVecTempCount;
@ -629,6 +631,12 @@ namespace ChocolArm64.Translation
public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index); public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index);
public void EmitStvectmp() => EmitStvec(VecGpTmp1Index); public void EmitStvectmp() => EmitStvec(VecGpTmp1Index);
public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index);
public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index);
public void EmitLdvectmp3() => EmitLdvec(VecGpTmp3Index);
public void EmitStvectmp3() => EmitStvec(VecGpTmp3Index);
public void EmitLdint(int index) => Ldloc(index, VarType.Int); public void EmitLdint(int index) => Ldloc(index, VarType.Int);
public void EmitStint(int index) => Stloc(index, VarType.Int); public void EmitStint(int index) => Stloc(index, VarType.Int);

View file

@ -97,6 +97,10 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> v1 = default(Vector128<float>), Vector128<float> v1 = default(Vector128<float>),
Vector128<float> v2 = default(Vector128<float>), Vector128<float> v2 = default(Vector128<float>),
Vector128<float> v3 = default(Vector128<float>), Vector128<float> v3 = default(Vector128<float>),
Vector128<float> v4 = default(Vector128<float>),
Vector128<float> v5 = default(Vector128<float>),
Vector128<float> v30 = default(Vector128<float>),
Vector128<float> v31 = default(Vector128<float>),
bool overflow = false, bool carry = false, bool zero = false, bool negative = false, bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
int fpcr = 0x0, int fpsr = 0x0) int fpcr = 0x0, int fpsr = 0x0)
{ {
@ -111,6 +115,10 @@ namespace Ryujinx.Tests.Cpu
_thread.ThreadState.V1 = v1; _thread.ThreadState.V1 = v1;
_thread.ThreadState.V2 = v2; _thread.ThreadState.V2 = v2;
_thread.ThreadState.V3 = v3; _thread.ThreadState.V3 = v3;
_thread.ThreadState.V4 = v4;
_thread.ThreadState.V5 = v5;
_thread.ThreadState.V30 = v30;
_thread.ThreadState.V31 = v31;
_thread.ThreadState.Overflow = overflow; _thread.ThreadState.Overflow = overflow;
_thread.ThreadState.Carry = carry; _thread.ThreadState.Carry = carry;
@ -133,6 +141,10 @@ namespace Ryujinx.Tests.Cpu
_unicornEmu.Q[1] = v1; _unicornEmu.Q[1] = v1;
_unicornEmu.Q[2] = v2; _unicornEmu.Q[2] = v2;
_unicornEmu.Q[3] = v3; _unicornEmu.Q[3] = v3;
_unicornEmu.Q[4] = v4;
_unicornEmu.Q[5] = v5;
_unicornEmu.Q[30] = v30;
_unicornEmu.Q[31] = v31;
_unicornEmu.OverflowFlag = overflow; _unicornEmu.OverflowFlag = overflow;
_unicornEmu.CarryFlag = carry; _unicornEmu.CarryFlag = carry;
@ -169,13 +181,17 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> v1 = default(Vector128<float>), Vector128<float> v1 = default(Vector128<float>),
Vector128<float> v2 = default(Vector128<float>), Vector128<float> v2 = default(Vector128<float>),
Vector128<float> v3 = default(Vector128<float>), Vector128<float> v3 = default(Vector128<float>),
Vector128<float> v4 = default(Vector128<float>),
Vector128<float> v5 = default(Vector128<float>),
Vector128<float> v30 = default(Vector128<float>),
Vector128<float> v31 = default(Vector128<float>),
bool overflow = false, bool carry = false, bool zero = false, bool negative = false, bool overflow = false, bool carry = false, bool zero = false, bool negative = false,
int fpcr = 0x0, int fpsr = 0x0) int fpcr = 0x0, int fpsr = 0x0)
{ {
Opcode(opcode); Opcode(opcode);
Opcode(0xD4200000); // BRK #0 Opcode(0xD4200000); // BRK #0
Opcode(0xD65F03C0); // RET Opcode(0xD65F03C0); // RET
SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, overflow, carry, zero, negative, fpcr, fpsr); SetThreadState(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
ExecuteOpcodes(); ExecuteOpcodes();
return GetThreadState(); return GetThreadState();

View file

@ -0,0 +1,315 @@
#define SimdTbl
using NUnit.Framework;
using System.Collections.Generic;
using System.Runtime.Intrinsics;
namespace Ryujinx.Tests.Cpu
{
[Category("SimdTbl")]
public sealed class CpuTestSimdTbl : CpuTest
{
#if SimdTbl
#region "Helper methods"
private static ulong GenIdxsForTbls(int regs)
{
const byte idxInRngMin = (byte)0;
byte idxInRngMax = (byte)((16 * regs) - 1);
byte idxOutRngMin = (byte) (16 * regs);
const byte idxOutRngMax = (byte)255;
ulong idxs = 0ul;
for (int cnt = 1; cnt <= 8; cnt++)
{
ulong idxInRng = (ulong)TestContext.CurrentContext.Random.NextByte(idxInRngMin, idxInRngMax);
ulong idxOutRng = (ulong)TestContext.CurrentContext.Random.NextByte(idxOutRngMin, idxOutRngMax);
ulong idx = TestContext.CurrentContext.Random.NextBool() ? idxInRng : idxOutRng;
idxs = (idxs << 8) | idx;
}
return idxs;
}
#endregion
#region "ValueSource (Types)"
private static ulong[] _8B_()
{
return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul };
}
private static IEnumerable<ulong> _GenIdxsForTbl1_()
{
yield return 0x0000000000000000ul;
yield return 0x7F7F7F7F7F7F7F7Ful;
yield return 0x8080808080808080ul;
yield return 0xFFFFFFFFFFFFFFFFul;
for (int cnt = 1; cnt <= RndCntIdxs; cnt++)
{
yield return GenIdxsForTbls(regs: 1);
}
}
private static IEnumerable<ulong> _GenIdxsForTbl2_()
{
yield return 0x0000000000000000ul;
yield return 0x7F7F7F7F7F7F7F7Ful;
yield return 0x8080808080808080ul;
yield return 0xFFFFFFFFFFFFFFFFul;
for (int cnt = 1; cnt <= RndCntIdxs; cnt++)
{
yield return GenIdxsForTbls(regs: 2);
}
}
private static IEnumerable<ulong> _GenIdxsForTbl3_()
{
yield return 0x0000000000000000ul;
yield return 0x7F7F7F7F7F7F7F7Ful;
yield return 0x8080808080808080ul;
yield return 0xFFFFFFFFFFFFFFFFul;
for (int cnt = 1; cnt <= RndCntIdxs; cnt++)
{
yield return GenIdxsForTbls(regs: 3);
}
}
private static IEnumerable<ulong> _GenIdxsForTbl4_()
{
yield return 0x0000000000000000ul;
yield return 0x7F7F7F7F7F7F7F7Ful;
yield return 0x8080808080808080ul;
yield return 0xFFFFFFFFFFFFFFFFul;
for (int cnt = 1; cnt <= RndCntIdxs; cnt++)
{
yield return GenIdxsForTbls(regs: 4);
}
}
#endregion
#region "ValueSource (Opcodes)"
private static uint[] _SingleRegTbl_V_8B_16B_()
{
return new uint[]
{
0x0E000000u, // TBL V0.8B, { V0.16B }, V0.8B
};
}
private static uint[] _TwoRegTbl_V_8B_16B_()
{
return new uint[]
{
0x0E002000u, // TBL V0.8B, { V0.16B, V1.16B }, V0.8B
};
}
private static uint[] _ThreeRegTbl_V_8B_16B_()
{
return new uint[]
{
0x0E004000u, // TBL V0.8B, { V0.16B, V1.16B, V2.16B }, V0.8B
};
}
private static uint[] _FourRegTbl_V_8B_16B_()
{
return new uint[]
{
0x0E006000u, // TBL V0.8B, { V0.16B, V1.16B, V2.16B, V3.16B }, V0.8B
};
}
#endregion
private const int RndCntTbls = 2;
private const int RndCntIdxs = 2;
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B }, <Vm>.<Ta>")]
public void SingleRegTbl_V_8B_16B([ValueSource("_SingleRegTbl_V_8B_16B_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u)] uint rn,
[Values(2u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_GenIdxsForTbl1_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(table0, table0);
Vector128<float> v2 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta>")]
public void TwoRegTbl_V_8B_16B([ValueSource("_TwoRegTbl_V_8B_16B_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u)] uint rn,
[Values(3u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_GenIdxsForTbl2_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(table0, table0);
Vector128<float> v2 = MakeVectorE0E1(table1, table1);
Vector128<float> v3 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B }, <Vm>.<Ta>")]
public void Mod_TwoRegTbl_V_8B_16B([ValueSource("_TwoRegTbl_V_8B_16B_")] uint opcodes,
[Values(30u, 1u)] uint rd,
[Values(31u)] uint rn,
[Values(1u, 30u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_GenIdxsForTbl2_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v30 = MakeVectorE0E1(z, z);
Vector128<float> v31 = MakeVectorE0E1(table0, table0);
Vector128<float> v0 = MakeVectorE0E1(table1, table1);
Vector128<float> v1 = MakeVectorE0E1(indexes, indexes);
SingleOpcode(opcodes, v0: v0, v1: v1, v30: v30, v31: v31);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta>")]
public void ThreeRegTbl_V_8B_16B([ValueSource("_ThreeRegTbl_V_8B_16B_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u)] uint rn,
[Values(4u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table2,
[ValueSource("_GenIdxsForTbl3_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(table0, table0);
Vector128<float> v2 = MakeVectorE0E1(table1, table1);
Vector128<float> v3 = MakeVectorE0E1(table2, table2);
Vector128<float> v4 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B }, <Vm>.<Ta>")]
public void Mod_ThreeRegTbl_V_8B_16B([ValueSource("_ThreeRegTbl_V_8B_16B_")] uint opcodes,
[Values(30u, 2u)] uint rd,
[Values(31u)] uint rn,
[Values(2u, 30u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table2,
[ValueSource("_GenIdxsForTbl3_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v30 = MakeVectorE0E1(z, z);
Vector128<float> v31 = MakeVectorE0E1(table0, table0);
Vector128<float> v0 = MakeVectorE0E1(table1, table1);
Vector128<float> v1 = MakeVectorE0E1(table2, table2);
Vector128<float> v2 = MakeVectorE0E1(indexes, indexes);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v30: v30, v31: v31);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta>")]
public void FourRegTbl_V_8B_16B([ValueSource("_FourRegTbl_V_8B_16B_")] uint opcodes,
[Values(0u)] uint rd,
[Values(1u)] uint rn,
[Values(5u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table2,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table3,
[ValueSource("_GenIdxsForTbl4_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v0 = MakeVectorE0E1(z, z);
Vector128<float> v1 = MakeVectorE0E1(table0, table0);
Vector128<float> v2 = MakeVectorE0E1(table1, table1);
Vector128<float> v3 = MakeVectorE0E1(table2, table2);
Vector128<float> v4 = MakeVectorE0E1(table3, table3);
Vector128<float> v5 = MakeVectorE0E1(indexes, q == 1u ? indexes : 0ul);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("TBL <Vd>.<Ta>, { <Vn>.16B, <Vn+1>.16B, <Vn+2>.16B, <Vn+3>.16B }, <Vm>.<Ta>")]
public void Mod_FourRegTbl_V_8B_16B([ValueSource("_FourRegTbl_V_8B_16B_")] uint opcodes,
[Values(30u, 3u)] uint rd,
[Values(31u)] uint rn,
[Values(3u, 30u)] uint rm,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table0,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table1,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table2,
[ValueSource("_8B_")] [Random(RndCntTbls)] ulong table3,
[ValueSource("_GenIdxsForTbl4_")] ulong indexes,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= ((q & 1) << 30);
ulong z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> v30 = MakeVectorE0E1(z, z);
Vector128<float> v31 = MakeVectorE0E1(table0, table0);
Vector128<float> v0 = MakeVectorE0E1(table1, table1);
Vector128<float> v1 = MakeVectorE0E1(table2, table2);
Vector128<float> v2 = MakeVectorE0E1(table3, table3);
Vector128<float> v3 = MakeVectorE0E1(indexes, indexes);
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2, v3: v3, v30: v30, v31: v31);
CompareAgainstUnicorn();
}
#endif
}
}