729ff5337c
* Fix increment on Arm32 NEON VLDn/VSTn instructions with regs > 1 * PPTC version bump * PR feedback
332 lines
11 KiB
C#
332 lines
11 KiB
C#
#define SimdMemory32
|
|
|
|
using ARMeilleure.State;
|
|
using NUnit.Framework;
|
|
using System;
|
|
|
|
namespace Ryujinx.Tests.Cpu
|
|
{
|
|
[Category("SimdMemory32")]
|
|
public sealed class CpuTestSimdMemory32 : CpuTest32
|
|
{
|
|
#if SimdMemory32
|
|
private const int RndCntImm = 2;
|
|
|
|
private uint[] _ldStModes =
|
|
{
|
|
// LD1
|
|
0b0111,
|
|
0b1010,
|
|
0b0110,
|
|
0b0010,
|
|
|
|
// LD2
|
|
0b1000,
|
|
0b1001,
|
|
0b0011,
|
|
|
|
// LD3
|
|
0b0100,
|
|
0b0101,
|
|
|
|
// LD4
|
|
0b0000,
|
|
0b0001
|
|
};
|
|
|
|
[Test, Pairwise, Description("VLDn.<size> <list>, [<Rn> {:<align>}]{ /!/, <Rm>} (single n element structure)")]
|
|
public void Vldn_Single([Values(0u, 1u, 2u)] uint size,
|
|
[Values(0u, 13u)] uint rn,
|
|
[Values(1u, 13u, 15u)] uint rm,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 7u)] uint index,
|
|
[Range(0u, 3u)] uint n,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xf4a00000u; // VLD1.8 {D0[0]}, [R0], R0
|
|
|
|
opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15);
|
|
|
|
uint index_align = (index << (int)(1 + size)) & 15;
|
|
|
|
opcode |= (index_align) << 4;
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc.
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VLDn.<size> <list>, [<Rn> {:<align>}]{ /!/, <Rm>} (all lanes)")]
|
|
public void Vldn_All([Values(0u, 13u)] uint rn,
|
|
[Values(1u, 13u, 15u)] uint rm,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 3u)] uint n,
|
|
[Range(0u, 2u)] uint size,
|
|
[Values] bool t,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xf4a00c00u; // VLD1.8 {D0[0]}, [R0], R0
|
|
|
|
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15);
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc.
|
|
if (t) opcode |= 1 << 5;
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VLDn.<size> <list>, [<Rn> {:<align>}]{ /!/, <Rm>} (multiple n element structures)")]
|
|
public void Vldn_Pair([Values(0u, 1u, 2u, 3u)] uint size,
|
|
[Values(0u, 13u)] uint rn,
|
|
[Values(1u, 13u, 15u)] uint rm,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 10u)] uint mode,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0
|
|
|
|
if (mode > 3 && size == 3)
|
|
{
|
|
// A size of 3 is only valid for VLD1.
|
|
size = 2;
|
|
}
|
|
|
|
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VSTn.<size> <list>, [<Rn> {:<align>}]{ /!/, <Rm>} (single n element structure)")]
|
|
public void Vstn_Single([Values(0u, 1u, 2u)] uint size,
|
|
[Values(0u, 13u)] uint rn,
|
|
[Values(1u, 13u, 15u)] uint rm,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 7u)] uint index,
|
|
[Range(0u, 3u)] uint n,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
(V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors();
|
|
|
|
uint opcode = 0xf4800000u; // VST1.8 {D0[0]}, [R0], R0
|
|
|
|
opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15);
|
|
|
|
uint index_align = (index << (int)(1 + size)) & 15;
|
|
|
|
opcode |= (index_align) << 4;
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
opcode |= (n & 3) << 8; // ST1 is 0, ST2 is 1 etc.
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VSTn.<size> <list>, [<Rn> {:<align>}]{ /!/, <Rm>} (multiple n element structures)")]
|
|
public void Vstn_Pair([Values(0u, 1u, 2u, 3u)] uint size,
|
|
[Values(0u, 13u)] uint rn,
|
|
[Values(1u, 13u, 15u)] uint rm,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 10u)] uint mode,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
(V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors();
|
|
|
|
uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
|
|
|
|
if (mode > 3 && size == 3)
|
|
{
|
|
// A size of 3 is only valid for VST1.
|
|
size = 2;
|
|
}
|
|
|
|
opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (_ldStModes[mode] << 8);
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VLDM.<size> <Rn>{!}, <d/sreglist>")]
|
|
public void Vldm([Values(0u, 13u)] uint rn,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd,
|
|
[Range(0u, 2u)] uint mode,
|
|
[Values(0x1u, 0x32u)] [Random(2u, 31u, RndCntImm)] uint regs,
|
|
[Values] bool single)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0
|
|
|
|
uint[] vldmModes =
|
|
{
|
|
// Note: 3rd 0 leaves a space for "D".
|
|
0b0100, // Increment after.
|
|
0b0101, // Increment after. (!)
|
|
0b1001 // Decrement before. (!)
|
|
};
|
|
|
|
opcode |= ((vldmModes[mode] & 15) << 21);
|
|
opcode |= ((rn & 15) << 16);
|
|
|
|
opcode |= ((vd & 0x10) << 18);
|
|
opcode |= ((vd & 0xf) << 12);
|
|
|
|
opcode |= ((uint)(single ? 0 : 1) << 8);
|
|
|
|
if (!single) regs = (regs << 1); // Low bit must be 0 - must be even number of registers.
|
|
uint regSize = single ? 1u : 2u;
|
|
|
|
if (vd + (regs / regSize) > 32) // Can't address further than S31 or D31.
|
|
{
|
|
regs -= (vd + (regs / regSize)) - 32;
|
|
}
|
|
|
|
if (regs / regSize > 16) // Can't do more than 16 registers at a time.
|
|
{
|
|
regs = 16 * regSize;
|
|
}
|
|
|
|
opcode |= regs & 0xff;
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, sp: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VLDR.<size> <Sd>, [<Rn> {, #{+/-}<imm>}]")]
|
|
public void Vldr([Values(2u, 3u)] uint size, // FP16 is not supported for now
|
|
[Values(0u)] uint rn,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm,
|
|
[Values] bool sub)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xed900a00u; // VLDR.32 S0, [R0, #0]
|
|
opcode |= ((size & 3) << 8) | ((rn & 15) << 16);
|
|
|
|
if (sub)
|
|
{
|
|
opcode &= ~(uint)(1 << 23);
|
|
}
|
|
|
|
if (size == 2)
|
|
{
|
|
opcode |= ((sd & 0x1) << 22);
|
|
opcode |= ((sd & 0x1e) << 11);
|
|
}
|
|
else
|
|
{
|
|
opcode |= ((sd & 0x10) << 18);
|
|
opcode |= ((sd & 0xf) << 12);
|
|
}
|
|
opcode |= imm & 0xff;
|
|
|
|
SingleOpcode(opcode, r0: 0x2500);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
[Test, Pairwise, Description("VSTR.<size> <Sd>, [<Rn> {, #{+/-}<imm>}]")]
|
|
public void Vstr([Values(2u, 3u)] uint size, // FP16 is not supported for now
|
|
[Values(0u)] uint rn,
|
|
[Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd,
|
|
[Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm,
|
|
[Values] bool sub)
|
|
{
|
|
var data = GenerateVectorSequence(0x1000);
|
|
SetWorkingMemory(0, data);
|
|
|
|
uint opcode = 0xed800a00u; // VSTR.32 S0, [R0, #0]
|
|
opcode |= ((size & 3) << 8) | ((rn & 15) << 16);
|
|
|
|
if (sub)
|
|
{
|
|
opcode &= ~(uint)(1 << 23);
|
|
}
|
|
|
|
if (size == 2)
|
|
{
|
|
opcode |= ((sd & 0x1) << 22);
|
|
opcode |= ((sd & 0x1e) << 11);
|
|
}
|
|
else
|
|
{
|
|
opcode |= ((sd & 0x10) << 18);
|
|
opcode |= ((sd & 0xf) << 12);
|
|
}
|
|
opcode |= imm & 0xff;
|
|
|
|
(V128 vec1, V128 vec2, _, _) = GenerateTestVectors();
|
|
|
|
SingleOpcode(opcode, r0: 0x2500, v0: vec1, v1: vec2);
|
|
|
|
CompareAgainstUnicorn();
|
|
}
|
|
|
|
private (V128, V128, V128, V128) GenerateTestVectors()
|
|
{
|
|
return (
|
|
new V128(-12.43f, 1872.23f, 4456.23f, -5622.2f),
|
|
new V128(0.0f, float.NaN, float.PositiveInfinity, float.NegativeInfinity),
|
|
new V128(1.23e10f, -0.0f, -0.123f, 0.123f),
|
|
new V128(float.Epsilon, 3.5f, 925.23f, -104.9f)
|
|
);
|
|
}
|
|
|
|
private byte[] GenerateVectorSequence(int length)
|
|
{
|
|
int floatLength = length >> 2;
|
|
float[] data = new float[floatLength];
|
|
|
|
for (int i = 0; i < floatLength; i++)
|
|
{
|
|
data[i] = i + (i / 9f);
|
|
}
|
|
|
|
var result = new byte[length];
|
|
Buffer.BlockCopy(data, 0, result, 0, result.Length);
|
|
return result;
|
|
}
|
|
#endif
|
|
}
|
|
}
|