diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs index 8eb2a99d6d..913d5082b6 100644 --- a/ARMeilleure/Decoders/Decoder.cs +++ b/ARMeilleure/Decoders/Decoder.cs @@ -292,15 +292,16 @@ namespace ARMeilleure.Decoders private static bool IsCall(OpCode opCode) { - // TODO (CQ): ARM32 support. return opCode.Instruction.Name == InstName.Bl || - opCode.Instruction.Name == InstName.Blr; + opCode.Instruction.Name == InstName.Blr || + opCode.Instruction.Name == InstName.Blx; } private static bool IsException(OpCode opCode) { return opCode.Instruction.Name == InstName.Brk || opCode.Instruction.Name == InstName.Svc || + opCode.Instruction.Name == InstName.Trap || opCode.Instruction.Name == InstName.Und; } diff --git a/ARMeilleure/Decoders/DecoderHelper.cs b/ARMeilleure/Decoders/DecoderHelper.cs index bc41c61c6a..6fe4678f96 100644 --- a/ARMeilleure/Decoders/DecoderHelper.cs +++ b/ARMeilleure/Decoders/DecoderHelper.cs @@ -148,5 +148,20 @@ namespace ARMeilleure.Decoders { return (((long)opCode << 45) >> 48) & ~3; } + + public static bool VectorArgumentsInvalid(bool q, params int[] args) + { + if (q) + { + for (int i = 0; i < args.Length; i++) + { + if ((args[i] & 1) == 1) + { + return true; + } + } + } + return false; + } } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32AluBf.cs b/ARMeilleure/Decoders/IOpCode32AluBf.cs new file mode 100644 index 0000000000..18de3eb652 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluBf.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluBf + { + int Rd { get; } + int Rn { get; } + + int Msb { get; } + int Lsb { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32AluReg.cs b/ARMeilleure/Decoders/IOpCode32AluReg.cs new file mode 100644 index 0000000000..1612cc5c9d --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluReg : IOpCode32Alu + { + int Rm { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32AluUx.cs b/ARMeilleure/Decoders/IOpCode32AluUx.cs new file mode 100644 index 0000000000..d03c7e219c --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluUx.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluUx : IOpCode32AluReg + { + int RotateBits { get; } + bool Add { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32Mem.cs b/ARMeilleure/Decoders/IOpCode32Mem.cs index 0585ab53ac..8fdc9dadfb 100644 --- a/ARMeilleure/Decoders/IOpCode32Mem.cs +++ b/ARMeilleure/Decoders/IOpCode32Mem.cs @@ -6,7 +6,6 @@ namespace ARMeilleure.Decoders int Rn { get; } bool WBack { get; } - bool IsLoad { get; } } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32MemEx.cs b/ARMeilleure/Decoders/IOpCode32MemEx.cs new file mode 100644 index 0000000000..aca7200a5c --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32MemEx.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemEx : IOpCode32Mem + { + int Rd { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32Simd.cs b/ARMeilleure/Decoders/IOpCode32Simd.cs new file mode 100644 index 0000000000..687254d92f --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32Simd.cs @@ -0,0 +1,4 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Simd : IOpCode32, IOpCodeSimd { } +} diff --git a/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/ARMeilleure/Decoders/IOpCode32SimdImm.cs new file mode 100644 index 0000000000..a0cb669c79 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32SimdImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32SimdImm : IOpCode32Simd + { + int Vd { get; } + long Immediate { get; } + int Elems { get; } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluBf.cs b/ARMeilleure/Decoders/OpCode32AluBf.cs new file mode 100644 index 0000000000..7ee0ee34eb --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluBf.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluBf : OpCode32, IOpCode32AluBf + { + public int Rd { get; private set; } + public int Rn { get; private set; } + + public int Msb { get; private set; } + + public int Lsb { get; private set; } + + public int SourceMask => (int)(0xFFFFFFFF >> (31 - Msb)); + public int DestMask => SourceMask & (int)(0xFFFFFFFF << Lsb); + + public OpCode32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rn = (opCode >> 0) & 0xf; + + Msb = (opCode >> 16) & 0x1f; + Lsb = (opCode >> 7) & 0x1f; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluImm16.cs b/ARMeilleure/Decoders/OpCode32AluImm16.cs new file mode 100644 index 0000000000..dbc02932a8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluImm16.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluImm16 : OpCode32Alu + { + public int Immediate { get; private set; } + + public OpCode32AluImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm12 = opCode & 0xfff; + int imm4 = (opCode >> 16) & 0xf; + + Immediate = (imm4 << 12) | imm12; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluMla.cs b/ARMeilleure/Decoders/OpCode32AluMla.cs new file mode 100644 index 0000000000..4570aa4e2a --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluMla.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluMla : OpCode32, IOpCode32AluReg + { + public int Rn { get; private set; } + public int Rm { get; private set; } + public int Ra { get; private set; } + public int Rd { get; private set; } + + public bool NHigh { get; private set; } + public bool MHigh { get; private set; } + public bool R { get; private set; } + public bool SetFlags { get; private set; } + + public OpCode32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0xf; + Rm = (opCode >> 8) & 0xf; + Ra = (opCode >> 12) & 0xf; + Rd = (opCode >> 16) & 0xf; + R = (opCode & (1 << 5)) != 0; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluReg.cs b/ARMeilleure/Decoders/OpCode32AluReg.cs new file mode 100644 index 0000000000..e378dd05d7 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluReg : OpCode32Alu, IOpCode32AluReg + { + public int Rm { get; private set; } + + public OpCode32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/ARMeilleure/Decoders/OpCode32AluRsImm.cs index 779d6cecf8..68ca0d0cbe 100644 --- a/ARMeilleure/Decoders/OpCode32AluRsImm.cs +++ b/ARMeilleure/Decoders/OpCode32AluRsImm.cs @@ -2,15 +2,15 @@ namespace ARMeilleure.Decoders { class OpCode32AluRsImm : OpCode32Alu { - public int Rm { get; private set; } - public int Imm { get; private set; } + public int Rm { get; private set; } + public int Immediate { get; private set; } public ShiftType ShiftType { get; private set; } public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Rm = (opCode >> 0) & 0xf; - Imm = (opCode >> 7) & 0x1f; + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; ShiftType = (ShiftType)((opCode >> 5) & 3); } diff --git a/ARMeilleure/Decoders/OpCode32AluRsReg.cs b/ARMeilleure/Decoders/OpCode32AluRsReg.cs new file mode 100644 index 0000000000..d195987bf8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluRsReg.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluRsReg : OpCode32Alu + { + public int Rm { get; private set; } + public int Rs { get; private set; } + + public ShiftType ShiftType { get; private set; } + + public OpCode32AluRsReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rs = (opCode >> 8) & 0xf; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluUmull.cs b/ARMeilleure/Decoders/OpCode32AluUmull.cs new file mode 100644 index 0000000000..c98d9305b4 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluUmull.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluUmull : OpCode32 + { + public int RdLo { get; private set; } + public int RdHi { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + + public bool NHigh { get; private set; } + public bool MHigh { get; private set; } + + public bool SetFlags { get; private set; } + public DataOp DataOp { get; private set; } + + public OpCode32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RdLo = (opCode >> 12) & 0xf; + RdHi = (opCode >> 16) & 0xf; + Rm = (opCode >> 8) & 0xf; + Rn = (opCode >> 0) & 0xf; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + + SetFlags = ((opCode >> 20) & 0x1) != 0; + DataOp = DataOp.Arithmetic; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluUx.cs b/ARMeilleure/Decoders/OpCode32AluUx.cs new file mode 100644 index 0000000000..55c10209de --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluUx.cs @@ -0,0 +1,16 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32AluUx : OpCode32AluReg, IOpCode32AluUx + { + public int Rotate { get; private set; } + public int RotateBits => Rotate * 8; + public bool Add => Rn != RegisterAlias.Aarch32Pc; + + public OpCode32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rotate = (opCode >> 10) & 0x3; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Exception.cs b/ARMeilleure/Decoders/OpCode32Exception.cs new file mode 100644 index 0000000000..1acdf5b122 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Exception.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Exception : OpCode32 + { + public int Id { get; private set; } + + public OpCode32Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = opCode & 0xFFFFFF; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Mem.cs b/ARMeilleure/Decoders/OpCode32Mem.cs index f4e88d5924..b749f3dbe5 100644 --- a/ARMeilleure/Decoders/OpCode32Mem.cs +++ b/ARMeilleure/Decoders/OpCode32Mem.cs @@ -4,7 +4,7 @@ namespace ARMeilleure.Decoders { class OpCode32Mem : OpCode32, IOpCode32Mem { - public int Rt { get; private set; } + public int Rt { get; protected set; } public int Rn { get; private set; } public int Immediate { get; protected set; } diff --git a/ARMeilleure/Decoders/OpCode32MemLdEx.cs b/ARMeilleure/Decoders/OpCode32MemLdEx.cs new file mode 100644 index 0000000000..42d1a33a02 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemLdEx.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemLdEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; private set; } + + public OpCode32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemReg.cs b/ARMeilleure/Decoders/OpCode32MemReg.cs new file mode 100644 index 0000000000..ccc05a875e --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemReg : OpCode32Mem + { + public int Rm { get; private set; } + + public OpCode32MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemRsImm.cs b/ARMeilleure/Decoders/OpCode32MemRsImm.cs new file mode 100644 index 0000000000..299e83e2c5 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemRsImm.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemRsImm : OpCode32Mem + { + public int Rm { get; private set; } + public ShiftType ShiftType { get; private set; } + + public OpCode32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemStEx.cs b/ARMeilleure/Decoders/OpCode32MemStEx.cs new file mode 100644 index 0000000000..b9c6d4f4ff --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemStEx.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemStEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; private set; } + + public OpCode32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rt = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Simd.cs b/ARMeilleure/Decoders/OpCode32Simd.cs new file mode 100644 index 0000000000..cda10c3c8f --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Simd.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Simd : OpCode32SimdBase + { + public int Opc { get; protected set; } + public bool Q { get; protected set; } + public bool F { get; protected set; } + public bool U { get; private set; } + + public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 20) & 0x3; + Q = ((opCode >> 6) & 0x1) != 0; + F = ((opCode >> 10) & 0x1) != 0; + U = ((opCode >> 24) & 0x1) != 0; + Opc = (opCode >> 7) & 0x3; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32Simd) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdBase.cs b/ARMeilleure/Decoders/OpCode32SimdBase.cs new file mode 100644 index 0000000000..10b5465978 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdBase.cs @@ -0,0 +1,52 @@ +using System; + +namespace ARMeilleure.Decoders +{ + abstract class OpCode32SimdBase : OpCode32, IOpCode32Simd + { + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Size { get; protected set; } + + // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it, + // depending on instruction type. + // + // Qx: The quadword register that the target vector is contained in. + // Ix: The starting index of the target vector within the quadword, with size treated as integer. + // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32) + public int Qd => GetQuadwordIndex(Vd); + public int Id => GetQuadwordSubindex(Vd) << (3 - Size); + public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 is fp16 which is an optional extension in ARMv8.2. We always assume 64. + + public int Qm => GetQuadwordIndex(Vm); + public int Im => GetQuadwordSubindex(Vm) << (3 - Size); + public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1)); + + protected int GetQuadwordIndex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + case RegisterSize.Simd64: + return index >> 1; + } + + throw new InvalidOperationException(); + } + + protected int GetQuadwordSubindex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + return 0; + case RegisterSize.Simd64: + return index & 1; + } + + throw new InvalidOperationException(); + } + + public OpCode32SimdBase(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdBinary.cs b/ARMeilleure/Decoders/OpCode32SimdBinary.cs new file mode 100644 index 0000000000..66f63dc5a4 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdBinary.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + /// + /// A special alias that always runs in 64 bit int, to speed up binary ops a little. + /// + class OpCode32SimdBinary : OpCode32SimdReg + { + public OpCode32SimdBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = 3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs new file mode 100644 index 0000000000..567147fbe5 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCmpZ : OpCode32Simd + { + public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 18) & 0x3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs new file mode 100644 index 0000000000..aaedcb3cfc --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtFI : OpCode32SimdS + { + public int Opc2 { get; private set; } + + public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc2 = (opCode >> 16) & 0x7; + Opc = (opCode >> 7) & 0x1; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs new file mode 100644 index 0000000000..fd83aee5f8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs @@ -0,0 +1,40 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupElem : OpCode32Simd + { + public int Index { get; private set; } + + public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + var opc = (opCode >> 16) & 0xf; + + if ((opc & 0b1) == 1) + { + Size = 0; + Index = (opc >> 1) & 0x7; + } + else if ((opc & 0b11) == 0b10) + { + Size = 1; + Index = (opc >> 2) & 0x3; + } + else if ((opc & 0b111) == 0b100) + { + Size = 2; + Index = (opc >> 3) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdDupGP.cs b/ARMeilleure/Decoders/OpCode32SimdDupGP.cs new file mode 100644 index 0000000000..58a8a7fe32 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdDupGP.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupGP : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + public int Vd { get; private set; } + public int Rt { get; private set; } + public bool Q { get; private set; } + + public OpCode32SimdDupGP(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = 2 - (((opCode >> 21) & 0x2) | ((opCode >> 5) & 0x1)); // B:E - 0 for 32, 16 then 8. + if (Size == -1) + { + Instruction = InstDescriptor.Undefined; + return; + } + Q = ((opCode >> 21) & 0x1) != 0; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = ((opCode >> 12) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdExt.cs b/ARMeilleure/Decoders/OpCode32SimdExt.cs new file mode 100644 index 0000000000..1ee0485ee3 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdExt.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdExt : OpCode32SimdReg + { + public int Immediate { get; private set; } + + public OpCode32SimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 8) & 0xf; + Size = 0; + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn) || (!Q && Immediate > 7)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdImm.cs b/ARMeilleure/Decoders/OpCode32SimdImm.cs new file mode 100644 index 0000000000..72fca59ca6 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdImm.cs @@ -0,0 +1,37 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm : OpCode32, IOpCode32SimdImm + { + public int Vd { get; private set; } + public bool Q { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public int Elems => GetBytesCount() >> Size; + + public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Q = ((opCode >> 6) & 0x1) > 0; + + int cMode = (opCode >> 8) & 0xf; + int op = (opCode >> 5) & 0x1; + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0x70; + imm |= ((uint)opCode >> 17) & 0x80; + + (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2); + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/ARMeilleure/Decoders/OpCode32SimdImm44.cs new file mode 100644 index 0000000000..f8f73bbba1 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdImm44.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm44 : OpCode32, IOpCode32SimdImm + { + public int Vd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public int Elems { get; private set; } + + public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0xf0; + + Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm]; + + RegisterSize = (!single) ? RegisterSize.Int64 : RegisterSize.Int32; + Elems = 1; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs new file mode 100644 index 0000000000..630566cc3f --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs @@ -0,0 +1,35 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemImm : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Size { get; private set; } + public bool Add { get; private set; } + public int Immediate { get; private set; } + + public OpCode32SimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = opCode & 0xff; + + Rn = (opCode >> 16) & 0xf; + Size = (opCode >> 8) & 0x3; + + Immediate <<= (Size == 1) ? 1 : 2; + + bool u = (opCode & (1 << 23)) != 0; + Add = u; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs new file mode 100644 index 0000000000..9d43a71eb2 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs @@ -0,0 +1,71 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemMult : OpCode32 + { + public int Rn { get; private set; } + public int Vd { get; private set; } + + public int RegisterRange { get; private set; } + public int Offset { get; private set; } + public int PostOffset { get; private set; } + public bool IsLoad { get; private set; } + public bool DoubleWidth { get; private set; } + public bool Add { get; private set; } + + public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + if (p == u && w) + { + Instruction = InstDescriptor.Undefined; + return; + } + + DoubleWidth = (opCode & (1 << 8)) != 0; + + if (!DoubleWidth) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + Add = u; + + RegisterRange = opCode & 0xff; + + int regsSize = RegisterRange * 4; // Double mode is still measured in single register size. + + if (!u) + { + Offset -= regsSize; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + + int regs = DoubleWidth ? RegisterRange / 2 : RegisterRange; + + if (RegisterRange == 0 || RegisterRange > 32 || Vd + regs > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs new file mode 100644 index 0000000000..93320e7f95 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs @@ -0,0 +1,46 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemPair : OpCode32, IOpCode32Simd + { + private static int[] RegsMap = + { + 1, 1, 4, 2, + 1, 1, 3, 1, + 1, 1, 2, 1, + 1, 1, 1, 1 + }; + + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + public int Align { get; private set; } + public bool WBack { get; private set; } + public bool RegisterIndex { get; private set; } + public int Size { get; private set; } + public int Elems => 8 >> Size; + public int Regs { get; private set; } + public int Increment { get; private set; } + + public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Size = (opCode >> 6) & 0x3; + + Align = (opCode >> 4) & 0x3; + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + + Regs = RegsMap[(opCode >> 8) & 0xf]; + + Increment = Math.Min(Regs, ((opCode >> 8) & 0x1) + 1); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs new file mode 100644 index 0000000000..8cdd374366 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs @@ -0,0 +1,46 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemSingle : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + public int IndexAlign { get; private set; } + public int Index { get; private set; } + public bool WBack { get; private set; } + public bool RegisterIndex { get; private set; } + public int Size { get; private set; } + public bool Replicate { get; private set; } + public int Increment { get; private set; } + + public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + IndexAlign = (opCode >> 4) & 0xf; + + Size = (opCode >> 10) & 0x3; + Replicate = Size == 3; + if (Replicate) + { + Size = (opCode >> 6) & 0x3; + Increment = ((opCode >> 5) & 1) + 1; + Index = 0; + } + else + { + Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2; + Index = IndexAlign >> (1 + Size); + } + + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs new file mode 100644 index 0000000000..918291a174 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGp : OpCode32, IOpCode32Simd + { + public int Size => 2; + + public int Vn { get; private set; } + public int Rt { get; private set; } + public int Op { get; private set; } + + public int Opc1 { get; private set; } + public int Opc2 { get; private set; } + + public OpCode32SimdMovGp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Opc1 = (opCode >> 21) & 0x3; + Opc2 = (opCode >> 5) & 0x3; + + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs b/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs new file mode 100644 index 0000000000..5f2725e15e --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpDouble : OpCode32, IOpCode32Simd + { + public int Size => 3; + + public int Vm { get; private set; } + public int Rt { get; private set; } + public int Rt2 { get; private set; } + public int Op { get; private set; } + + public OpCode32SimdMovGpDouble(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Rt = (opCode >> 12) & 0xf; + Rt2 = (opCode >> 16) & 0xf; + + bool single = (opCode & (1 << 8)) == 0; + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs new file mode 100644 index 0000000000..350d5c8467 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs @@ -0,0 +1,46 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + + public int Vd { get; private set; } + public int Rt { get; private set; } + public int Op { get; private set; } + public bool U { get; private set; } + + public int Index { get; private set; } + + public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Op = (opCode >> 20) & 0x1; + U = ((opCode >> 23) & 1) != 0; + + var opc = (((opCode >> 23) & 1) << 4) | (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3); + + if ((opc & 0b01000) == 0b01000) + { + Size = 0; + Index = opc & 0x7; + } + else if ((opc & 0b01001) == 0b00001) + { + Size = 1; + Index = (opc >> 1) & 0x3; + } + else if ((opc & 0b11011) == 0) + { + Size = 2; + Index = (opc >> 2) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + return; + } + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdReg.cs b/ARMeilleure/Decoders/OpCode32SimdReg.cs new file mode 100644 index 0000000000..da1d2e363f --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdReg : OpCode32Simd + { + public int Vn { get; private set; } + + public int Qn => GetQuadwordIndex(Vn); + public int In => GetQuadwordSubindex(Vn) << (3 - Size); + public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1)); + + public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdReg) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs new file mode 100644 index 0000000000..4bf15cca55 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElem : OpCode32SimdReg + { + public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = ((opCode >> 24) & 0x1) != 0; + F = ((opCode >> 8) & 0x1) != 0; + Size = ((opCode >> 20) & 0x3); + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRegS.cs b/ARMeilleure/Decoders/OpCode32SimdRegS.cs new file mode 100644 index 0000000000..b4ffad80a2 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegS.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegS : OpCode32SimdS + { + public int Vn { get; private set; } + + public OpCode32SimdRegS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + bool single = Size != 3; + if (single) + { + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + } + else + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRev.cs b/ARMeilleure/Decoders/OpCode32SimdRev.cs new file mode 100644 index 0000000000..6cdf9f5773 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRev.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRev : OpCode32SimdCmpZ + { + public OpCode32SimdRev(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Currently, this instruction is treated as though it's OPCODE is the true size, + // which lets us deal with reversing vectors on a single element basis (eg. math magic an I64 rather than insert lots of I8s). + int tempSize = Size; + Size = 3 - Opc; // Op 0 is 64 bit, 1 is 32 and so on. + Opc = tempSize; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdS.cs b/ARMeilleure/Decoders/OpCode32SimdS.cs new file mode 100644 index 0000000000..2e860d9c87 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdS.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdS : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Vm { get; private set; } + public int Opc { get; protected set; } + public int Size { get; protected set; } + + public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc = (opCode >> 15) & 0x3; + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + RegisterSize = single ? RegisterSize.Int32 : RegisterSize.Int64; + + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSel.cs b/ARMeilleure/Decoders/OpCode32SimdSel.cs new file mode 100644 index 0000000000..aefe138fa7 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSel.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSel : OpCode32SimdRegS + { + public OpCode32SimdSelMode Cc { get; private set; } + + public OpCode32SimdSel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cc = (OpCode32SimdSelMode)((opCode >> 20) & 3); + } + } + + enum OpCode32SimdSelMode : int + { + Eq = 0, + Vs, + Ge, + Gt + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs new file mode 100644 index 0000000000..b19a601fb6 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImm : OpCode32Simd + { + public int Immediate { get; private set; } + public int Shift { get; private set; } + + public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 16) & 0x3f; + var limm = ((opCode >> 1) & 0x40) | Immediate; + + if ((limm & 0x40) == 0b1000000) + { + Size = 3; + Shift = Immediate; + } + else if ((limm & 0x60) == 0b0100000) + { + Size = 2; + Shift = Immediate - 32; + } + else if ((limm & 0x70) == 0b0010000) + { + Size = 1; + Shift = Immediate - 16; + } + else if ((limm & 0x78) == 0b0001000) + { + Size = 0; + Shift = Immediate - 8; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSpecial.cs b/ARMeilleure/Decoders/OpCode32SimdSpecial.cs new file mode 100644 index 0000000000..986afcf94e --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSpecial.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSpecial : OpCode32 + { + public int Rt { get; private set; } + public int Sreg { get; private set; } + + public OpCode32SimdSpecial(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Sreg = (opCode >> 16) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSqrte.cs b/ARMeilleure/Decoders/OpCode32SimdSqrte.cs new file mode 100644 index 0000000000..9eb7f775d9 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSqrte.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSqrte : OpCode32Simd + { + public OpCode32SimdSqrte(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 18) & 0x1; + F = ((opCode >> 8) & 0x1) != 0; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/ARMeilleure/Decoders/OpCode32SimdTbl.cs new file mode 100644 index 0000000000..e59627c300 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdTbl.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdTbl : OpCode32SimdReg + { + public int Length { get; private set; } + + public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Length = (opCode >> 8) & 3; + Size = 0; + Opc = Q ? 1 : 0; + Q = false; + RegisterSize = RegisterSize.Simd64; + + if (Vn + Length + 1 > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32System.cs b/ARMeilleure/Decoders/OpCode32System.cs new file mode 100644 index 0000000000..bf4383017a --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32System.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32System : OpCode32 + { + public int Opc1 { get; private set; } + public int CRn { get; private set; } + public int Rt { get; private set; } + public int Opc2 { get; private set; } + public int CRm { get; private set; } + public int MrrcOp { get; private set; } + + public int Coproc { get; private set; } + + public OpCode32System(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc1 = (opCode >> 21) & 0x7; + CRn = (opCode >> 16) & 0xf; + Rt = (opCode >> 12) & 0xf; + Opc2 = (opCode >> 5) & 0x7; + CRm = (opCode >> 0) & 0xf; + MrrcOp = (opCode >> 4) & 0xf; + + Coproc = (opCode >> 8) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/ARMeilleure/Decoders/OpCodeSimdHelper.cs new file mode 100644 index 0000000000..3e5a7f65b9 --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdHelper.cs @@ -0,0 +1,88 @@ +namespace ARMeilleure.Decoders +{ + public static class OpCodeSimdHelper + { + public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0) + { + int modeLow = cMode & 1; + int modeHigh = cMode >> 1; + int size = 0; + + if (modeHigh == 0b111) + { + switch (op | (modeLow << 1)) + { + case 0: + // 64-bits Immediate. + // Transform abcd efgh into abcd efgh abcd efgh ... + size = 3; + imm = (long)((ulong)imm * 0x0101010101010101); + break; + + case 1: + // 64-bits Immediate. + // Transform abcd efgh into aaaa aaaa bbbb bbbb ... + size = 3; + imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4; + imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2; + imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1; + + imm = (long)((ulong)imm * 0x8040201008040201); + imm = (long)((ulong)imm & 0x8080808080808080); + + imm |= imm >> 4; + imm |= imm >> 2; + imm |= imm >> 1; + break; + + case 2: + // 2 x 32-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; + imm |= imm << 32; + break; + + case 3: + // 64-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; + break; + } + } + else if ((modeHigh & 0b110) == 0b100) + { + // 16-bits shifted Immediate. + size = 1; imm <<= (modeHigh & 1) << 3; + } + else if ((modeHigh & 0b100) == 0b000) + { + // 32-bits shifted Immediate. + size = 2; imm <<= modeHigh << 3; + } + else if ((modeHigh & 0b111) == 0b110) + { + // 32-bits shifted Immediate (fill with ones). + size = 2; imm = ShlOnes(imm, 8 << modeLow); + } + else + { + // 8-bits without shift. + size = 0; + } + + return (imm, size); + } + + private static long ShlOnes(long value, int shift) + { + if (shift != 0) + { + return value << shift | (long)(ulong.MaxValue >> (64 - shift)); + } + else + { + return value; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 2fa7702d90..3915ac87bb 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -599,32 +599,283 @@ namespace ARMeilleure.Decoders #region "OpCode Table (AArch32)" // Base - SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm)); - SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm)); - SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm)); - SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm)); - SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm)); - SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg)); - SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg)); - SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm)); - SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm)); - SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult)); - SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm)); - SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8)); - SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm)); - SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm)); - SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8)); - SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult)); - SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm)); - SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8)); - SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm)); - SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm)); + SetA32("<<<<0010101xxxxxxxxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluImm)); + SetA32("<<<<0000101xxxxxxxxxxxxxxxx0xxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000101xxxxxxxxxxxxx0xx1xxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm)); + SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000100xxxxxxxxxxxxx0xx1xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010000xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluImm)); + SetA32("<<<<0000000xxxxxxxxxxxxxxxx0xxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000000xxxxxxxxxxxxx0xx1xxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluRsReg)); + SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm)); + SetA32("<<<<0111110xxxxxxxxxxxxxx0011111", InstName.Bfc, InstEmit32.Bfc, typeof(OpCode32AluBf)); + SetA32("<<<<0111110xxxxxxxxxxxxxx001xxxx", InstName.Bfi, InstEmit32.Bfi, typeof(OpCode32AluBf)); + SetA32("<<<<0011110xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluImm)); + SetA32("<<<<0001110xxxxxxxxxxxxxxxx0xxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001110xxxxxxxxxxxxx0xx1xxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluRsReg)); + SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm)); + SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm)); + SetA32("<<<<000100101111111111110011xxxx", InstName.Blx, InstEmit32.Blxr, typeof(OpCode32BReg)); + SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg)); + SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg)); + SetA32("11110101011111111111000000011111", InstName.Clrex, InstEmit32.Clrex, typeof(OpCode32)); + SetA32("<<<<000101101111xxxx11110001xxxx", InstName.Clz, InstEmit32.Clz, typeof(OpCode32AluReg)); + SetA32("<<<<00110111xxxx0000xxxxxxxxxxxx", InstName.Cmn, InstEmit32.Cmn, typeof(OpCode32AluImm)); + SetA32("<<<<00010111xxxx0000xxxxxxx0xxxx", InstName.Cmn, InstEmit32.Cmn, typeof(OpCode32AluRsImm)); + SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm)); + SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010101xxxx0000xxxx0xx1xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsReg)); + SetA32("1111010101111111111100000101xxxx", InstName.Dmb, InstEmit32.Dmb, typeof(OpCode32)); + SetA32("1111010101111111111100000100xxxx", InstName.Dsb, InstEmit32.Dsb, typeof(OpCode32)); + SetA32("<<<<0010001xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluImm)); + SetA32("<<<<0000001xxxxxxxxxxxxxxxx0xxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000001xxxxxxxxxxxxx0xx1xxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluRsReg)); + SetA32("1111010101111111111100000110xxxx", InstName.Isb, InstEmit32.Nop, typeof(OpCode32)); + SetA32("<<<<00011001xxxxxxxx110010011111", InstName.Lda, InstEmit32.Lda, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx110010011111", InstName.Ldab, InstEmit32.Ldab, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011001xxxxxxxx111010011111", InstName.Ldaex, InstEmit32.Ldaex, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx111010011111", InstName.Ldaexb, InstEmit32.Ldaexb, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011011xxxxxxxx111010011111", InstName.Ldaexd, InstEmit32.Ldaexd, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx111010011111", InstName.Ldaexh, InstEmit32.Ldaexh, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx110010011111", InstName.Ldah, InstEmit32.Ldah, typeof(OpCode32MemLdEx)); + SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm)); + SetA32("<<<<011xx0x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemRsImm)); + SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm)); + SetA32("<<<<011xx1x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemRsImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemReg)); + SetA32("<<<<00011001xxxxxxxx111110011111", InstName.Ldrex, InstEmit32.Ldrex, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx111110011111", InstName.Ldrexb, InstEmit32.Ldrexb, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011011xxxxxxxx111110011111", InstName.Ldrexd, InstEmit32.Ldrexd, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx111110011111", InstName.Ldrexh, InstEmit32.Ldrexh, typeof(OpCode32MemLdEx)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemReg)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemReg)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemReg)); + SetA32("<<<<1110xxx0xxxxxxxx111xxxx1xxxx", InstName.Mcr, InstEmit32.Mcr, typeof(OpCode32System)); + SetA32("<<<<0000001xxxxxxxxxxxxx1001xxxx", InstName.Mla, InstEmit32.Mla, typeof(OpCode32AluMla)); + SetA32("<<<<00000110xxxxxxxxxxxx1001xxxx", InstName.Mls, InstEmit32.Mls, typeof(OpCode32AluMla)); + SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm)); + SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001101x0000xxxxxxxx0xx1xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsReg)); + SetA32("<<<<00110000xxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm16)); + SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8)); + SetA32("<<<<00110100xxxxxxxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, typeof(OpCode32AluImm16)); + SetA32("<<<<1110xxx1xxxxxxxx111xxxx1xxxx", InstName.Mrc, InstEmit32.Mrc, typeof(OpCode32System)); + SetA32("<<<<11000101xxxxxxxx111xxxxxxxxx", InstName.Mrrc, InstEmit32.Mrrc, typeof(OpCode32System)); + SetA32("<<<<0000000xxxxx0000xxxx1001xxxx", InstName.Mul, InstEmit32.Mul, typeof(OpCode32AluMla)); + SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluImm)); + SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsReg)); + SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluImm)); + SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsReg)); + SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, typeof(OpCode32AluRsImm)); + SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, typeof(OpCode32)); + SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, typeof(OpCode32)); + SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, typeof(OpCode32AluReg)); + SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, typeof(OpCode32AluReg)); + SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, typeof(OpCode32AluReg)); + SetA32("<<<<011011111111xxxx11111011xxxx", InstName.Revsh, InstEmit32.Revsh, typeof(OpCode32AluReg)); + SetA32("<<<<0010011xxxxxxxxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluImm)); + SetA32("<<<<0000011xxxxxxxxxxxxxxxx0xxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000011xxxxxxxxxxxxx0xx1xxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluImm)); + SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluImm)); + SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, typeof(OpCode32AluBf)); + SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, typeof(OpCode32AluMla)); + SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smlab, InstEmit32.Smlab, typeof(OpCode32AluMla)); + SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, typeof(OpCode32AluUmull)); + SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlalh, InstEmit32.Smlalh, typeof(OpCode32AluUmull)); + SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, typeof(OpCode32AluMla)); + SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, typeof(OpCode32AluMla)); + SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smulh, InstEmit32.Smulh, typeof(OpCode32AluMla)); + SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, typeof(OpCode32AluUmull)); + SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl, InstEmit32.Stl, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb, InstEmit32.Stlb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex, InstEmit32.Stlex, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxxxxxx11101001xxxx", InstName.Stlexb, InstEmit32.Stlexb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011010xxxxxxxx11101001xxxx", InstName.Stlexd, InstEmit32.Stlexd, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxxxxxx11101001xxxx", InstName.Stlexh, InstEmit32.Stlexh, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxx111111001001xxxx", InstName.Stlh, InstEmit32.Stlh, typeof(OpCode32MemStEx)); + SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm)); + SetA32("<<<<011xx0x0xxxxxxxxxxxxxxx0xxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemRsImm)); + SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm)); + SetA32("<<<<011xx1x0xxxxxxxxxxxxxxx0xxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemRsImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemReg)); + SetA32("<<<<00011000xxxxxxxx11111001xxxx", InstName.Strex, InstEmit32.Strex, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxxxxxx11111001xxxx", InstName.Strexb, InstEmit32.Strexb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011010xxxxxxxx11111001xxxx", InstName.Strexd, InstEmit32.Strexd, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxxxxxx11111001xxxx", InstName.Strexh, InstEmit32.Strexh, typeof(OpCode32MemStEx)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemReg)); + SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm)); + SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000010xxxxxxxxxxxxx0xx1xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsReg)); + SetA32("<<<<1111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Svc, InstEmit32.Svc, typeof(OpCode32Exception)); + SetA32("<<<<01101010xxxxxxxxxx000111xxxx", InstName.Sxtb, InstEmit32.Sxtb, typeof(OpCode32AluUx)); + SetA32("<<<<01101000xxxxxxxxxx000111xxxx", InstName.Sxtb16, InstEmit32.Sxtb16, typeof(OpCode32AluUx)); + SetA32("<<<<01101011xxxxxxxxxx000111xxxx", InstName.Sxth, InstEmit32.Sxth, typeof(OpCode32AluUx)); + SetA32("<<<<00110011xxxx0000xxxxxxxxxxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluImm)); + SetA32("<<<<00010011xxxx0000xxxxxxx0xxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010011xxxx0000xxxx0xx1xxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111111111111101111011111110", InstName.Trap, InstEmit32.Trap, typeof(OpCode32Exception)); + SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluImm)); + SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, typeof(OpCode32AluBf)); + SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, typeof(OpCode32AluMla)); + SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, typeof(OpCode32AluUmull)); + SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, typeof(OpCode32AluUmull)); + SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, typeof(OpCode32AluUx)); + SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, typeof(OpCode32AluUx)); + SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, typeof(OpCode32AluUx)); + + // FP & SIMD + SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); + SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); + SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); + SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, typeof(OpCode32SimdBinary)); + SetA32("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, typeof(OpCode32SimdBinary)); + SetA32("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, typeof(OpCode32SimdBinary)); + SetA32("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, typeof(OpCode32SimdBinary)); + SetA32("111100110x<>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). + SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector. + SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); + SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); + SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); + SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, typeof(OpCode32SimdMemImm)); + SetA32("1111001x0x<>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, typeof(OpCode32SimdReg)); + SetA32("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); + SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); + SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); + SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImm)); + SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); + SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm)); + SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl)); + SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ)); #endregion FillFastLookupTable(_instA32FastLookup, _allInstA32); diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs index c3e99dfab8..100a9b1130 100644 --- a/ARMeilleure/Diagnostics/IRDumper.cs +++ b/ARMeilleure/Diagnostics/IRDumper.cs @@ -134,6 +134,7 @@ namespace ARMeilleure.Diagnostics switch (reg.Type) { case RegisterType.Flag: name = "b" + reg.Index; break; + case RegisterType.FpFlag: name = "f" + reg.Index; break; case RegisterType.Integer: name = "r" + reg.Index; break; case RegisterType.Vector: name = "v" + reg.Index; break; } diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs index 424203ffae..b65149cb81 100644 --- a/ARMeilleure/Instructions/DelegateTypes.cs +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -4,13 +4,19 @@ using System; namespace ARMeilleure.Instructions { delegate double _F64_F64(double a1); + delegate double _F64_F64_Bool(double a1, bool a2); delegate double _F64_F64_F64(double a1, double a2); + delegate double _F64_F64_F64_Bool(double a1, double a2, bool a3); delegate double _F64_F64_F64_F64(double a1, double a2, double a3); + delegate double _F64_F64_F64_F64_Bool(double a1, double a2, double a3, bool a4); delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2); delegate float _F32_F32(float a1); + delegate float _F32_F32_Bool(float a1, bool a2); delegate float _F32_F32_F32(float a1, float a2); + delegate float _F32_F32_F32_Bool(float a1, float a2, bool a3); delegate float _F32_F32_F32_F32(float a1, float a2, float a3); + delegate float _F32_F32_F32_F32_Bool(float a1, float a2, float a3, bool a4); delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2); delegate float _F32_U16(ushort a1); @@ -37,6 +43,7 @@ namespace ARMeilleure.Instructions delegate ushort _U16_F32(float a1); delegate ushort _U16_U64(ulong a1); + delegate uint _U32(); delegate uint _U32_F32(float a1); delegate uint _U32_F64(double a1); delegate uint _U32_U32(uint a1); @@ -74,6 +81,7 @@ namespace ARMeilleure.Instructions delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3); delegate void _Void(); + delegate void _Void_U32(uint a1); delegate void _Void_U64(ulong a1); delegate void _Void_U64_S32(ulong a1, int a2); delegate void _Void_U64_U16(ulong a1, ushort a2); diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs index ed1faae417..6e2875e649 100644 --- a/ARMeilleure/Instructions/InstEmitAlu.cs +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -276,23 +276,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); - - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); - - return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); - } - private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op) { Debug.Assert(op.Type == OperandType.I64); @@ -331,23 +314,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); - - return context.ConvertI64ToI32(val); - } - - private static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I64); - - return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); - } - public static void Rev32(ArmEmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs index 79b0abbc32..4d03f5c24a 100644 --- a/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -3,8 +3,8 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; -using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -31,6 +31,101 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } + public static void Adc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + res = context.Add(res, carry); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitAdcsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void And(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Bfc(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + + SetIntA32(context, op.Rd, res); + } + + public static void Bfi(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand d = GetIntA32(context, op.Rd); + Operand part = context.BitwiseAnd(n, Const(op.SourceMask)); + + if (op.Lsb != 0) + { + part = context.ShiftLeft(part, Const(op.Lsb)); + } + + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask))); + + SetIntA32(context, op.Rd, res); + } + + public static void Bic(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.CountLeadingZeros(m); + EmitAluStore(context, res); + } + public static void Cmp(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -46,6 +141,36 @@ namespace ARMeilleure.Instructions EmitSubsVCheck(context, n, m, res); } + public static void Cmn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + public static void Eor(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + public static void Mov(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -60,6 +185,210 @@ namespace ARMeilleure.Instructions EmitAluStore(context, m); } + public static void Movt(ArmEmitterContext context) + { + OpCode32AluImm16 op = (OpCode32AluImm16)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword. + Operand res = context.BitwiseAnd(d, Const(0x0000ffff)); + res = context.BitwiseOr(res, imm); + + EmitAluStore(context, res); + } + + public static void Mul(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.Multiply(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mvn(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + Operand m = GetAluM(context); + + Operand res = context.BitwiseNot(m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Orr(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Pkh(ArmEmitterContext context) + { + OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res; + + bool tbform = op.ShiftType == ShiftType.Asr; + if (tbform) + { + res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF))); + } + else + { + res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF))); + } + + EmitAluStore(context, res); + } + + public static void Rbit(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBits32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Rev(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = context.ByteSwap(m); + + EmitAluStore(context, res); + } + + public static void Rev16(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Revsh(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, context.SignExtend16(OperandType.I32, res)); + } + + public static void Rsc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, m, n); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Rsb(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, m, res); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Sbc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Sbfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Sdiv(ArmEmitterContext context) + { + EmitDiv(context, false); + } + public static void Sub(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -80,50 +409,216 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } - private static void EmitAluStore(ArmEmitterContext context, Operand value) + public static void Sxtb(ArmEmitterContext context) { - IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + EmitSignExtend(context, true, 8); + } - if (op.Rd == RegisterAlias.Aarch32Pc) + public static void Sxtb16(ArmEmitterContext context) + { + EmitExtend16(context, true); + } + + public static void Sxth(ArmEmitterContext context) + { + EmitSignExtend(context, true, 16); + } + + public static void Teq(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + EmitNZFlagsCheck(context, res); + } + + public static void Tst(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + EmitNZFlagsCheck(context, res); + } + + public static void Ubfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Udiv(ArmEmitterContext context) + { + EmitDiv(context, true); + } + + public static void Uxtb(ArmEmitterContext context) + { + EmitSignExtend(context, false, 8); + } + + public static void Uxtb16(ArmEmitterContext context) + { + EmitExtend16(context, false); + } + + public static void Uxth(ArmEmitterContext context) + { + EmitSignExtend(context, false, 16); + } + + private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) { - if (op.SetFlags) + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + switch (bits) + { + case 8: + res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res); + break; + case 16: + res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res); + break; + } + + if (op.Add) + { + res = context.Add(res, GetAluN(context)); + } + + EmitAluStore(context, res); + } + + private static void EmitExtend16(ArmEmitterContext context, bool signed) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) + { + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + Operand low16, high16; + if (signed) + { + low16 = context.SignExtend8(OperandType.I32, res); + high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + else + { + low16 = context.ZeroExtend8(OperandType.I32, res); + high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + + if (op.Add) + { + Operand n = GetAluN(context); + Operand lowAdd, highAdd; + if (signed) { - // TODO: Load SPSR etc. - Operand isThumb = GetFlag(PState.TFlag); - - Operand lblThumb = Label(); - - context.BranchIfTrue(lblThumb, isThumb); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3)))); - - context.MarkLabel(lblThumb); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + lowAdd = context.SignExtend16(OperandType.I32, n); + highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); } else { - EmitAluWritePc(context, value); + lowAdd = context.ZeroExtend16(OperandType.I32, n); + highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); } + + low16 = context.Add(low16, lowAdd); + high16 = context.Add(high16, highAdd); } - else - { - SetIntA32(context, op.Rd, value); - } + + res = context.BitwiseOr( + context.ZeroExtend16(OperandType.I32, low16), + context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16))); + + EmitAluStore(context, res); } - private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + public static void EmitDiv(ArmEmitterContext context, bool unsigned) { - context.StoreToContext(); + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand zero = Const(m.Type, 0); - if (IsThumb(context.CurrOp)) + Operand divisorIsZero = context.ICompareEqual(m, zero); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) { - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); - } - else - { - EmitBxWritePc(context, value); + // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + // TODO: tests to ensure A32 works the same + + Operand intMin = Const(int.MinValue); + Operand minus1 = Const(-1); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + EmitAluStore(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); } + + Operand res = unsigned + ? context.DivideUI(n, m) + : context.Divide(n, m); + + EmitAluStore(context, res); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + EmitAluStore(context, zero); + + context.MarkLabel(lblEnd); + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + EmitGenericAluStoreA32(context, op.Rd, op.SetFlags, value); } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index d032b32e87..3bb87f2738 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -3,6 +3,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; using System; +using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -77,6 +78,89 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.VFlag, vOut); } + public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); + } + + public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); + } + + public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); + + return context.ConvertI64ToI32(val); + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + context.StoreToContext(); + + if (IsThumb(context.CurrOp)) + { + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitBxWritePc(context, value); + } + } + + public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + if (rd == RegisterAlias.Aarch32Pc && setFlags) + { + if (setFlags) + { + // TODO: Load SPSR etc. + Operand isThumb = GetFlag(PState.TFlag); + + Operand lblThumb = Label(); + + context.BranchIfTrue(lblThumb, isThumb); + + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(value, Const(~3)), Const(1)))); + + context.MarkLabel(lblThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, rd, value); + } + } public static Operand GetAluN(ArmEmitterContext context) { @@ -116,10 +200,15 @@ namespace ARMeilleure.Instructions return Const(op.Immediate); } + case OpCode32AluImm16 op: return Const(op.Immediate); + case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + case OpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry); case OpCodeT16AluImm8 op: return Const(op.Immediate); + case IOpCode32AluReg op: return GetIntA32(context, op.Rm); + // ARM64. case IOpCodeAluImm op: { @@ -167,11 +256,11 @@ namespace ARMeilleure.Instructions } // ARM32 helpers. - private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) { Operand m = GetIntA32(context, op.Rm); - int shift = op.Imm; + int shift = op.Immediate; if (shift == 0) { @@ -193,7 +282,7 @@ namespace ARMeilleure.Instructions case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; case ShiftType.Ror: - if (op.Imm != 0) + if (op.Immediate != 0) { m = GetRorC(context, m, setCarry, shift); } @@ -208,8 +297,74 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand GetMShiftedByReg(ArmEmitterContext context, OpCode32AluRsReg op, bool setCarry) { + Operand m = GetIntA32(context, op.Rm); + Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs)); + Operand shiftIsZero = context.ICompareEqual(s, Const(0)); + + Operand zeroResult = m; + Operand shiftResult = m; + + setCarry &= op.SetFlags; + + switch (op.ShiftType) + { + case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break; + } + + return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult); + } + + public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true) + { + Debug.Assert(boolValue.Type == OperandType.I32); + + Operand endLabel = Label(); + + if (expected) + { + context.BranchIfFalse(endLabel, boolValue); + } + else + { + context.BranchIfTrue(endLabel, boolValue); + } + + action(); + + context.MarkLabel(endLabel); + } + + public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftLeft(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -238,8 +393,32 @@ namespace ARMeilleure.Instructions } } - private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftRightUI(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -274,8 +453,45 @@ namespace ARMeilleure.Instructions return Const(0); } - private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand l32Result; + Operand ge32Result; + + Operand less32 = context.ICompareLess(shift, Const(32)); + + ge32Result = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () => + { + SetCarryMLsb(context, ge32Result); + }, false); + } + + l32Result = context.ShiftRightSI(m, shift); + if (setCarry) + { + EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + }); + } + + return context.ConditionalSelect(less32, l32Result, ge32Result); + } + + public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift >= 32) { m = context.ShiftRightSI(m, Const(31)); @@ -298,8 +514,28 @@ namespace ARMeilleure.Instructions } } - private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + shift = context.BitwiseAnd(shift, Const(0x1f)); + m = context.RotateRight(m, shift); + + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + SetCarryMMsb(context, m); + }, false); + } + + return m; + } + + public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + shift &= 0x1f; m = context.RotateRight(m, Const(shift)); @@ -312,8 +548,10 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) { + Debug.Assert(m.Type == OperandType.I32); + // Rotate right by 1 with carry. Operand cIn = context.Copy(GetFlag(PState.CFlag)); @@ -331,16 +569,22 @@ namespace ARMeilleure.Instructions private static void SetCarryMLsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); } private static void SetCarryMMsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); } private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) { + Debug.Assert(m.Type == OperandType.I32); + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); cOut = context.BitwiseAnd(cOut, Const(1)); diff --git a/ARMeilleure/Instructions/InstEmitException32.cs b/ARMeilleure/Instructions/InstEmitException32.cs new file mode 100644 index 0000000000..a73f0dec77 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitException32.cs @@ -0,0 +1,36 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Svc(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.SupervisorCall); + } + + public static void Trap(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.Break); + } + + private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func) + { + OpCode32Exception op = (OpCode32Exception)context.CurrOp; + + context.StoreToContext(); + + context.Call(func, Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs index 27addc78e3..cbb9ad5b26 100644 --- a/ARMeilleure/Instructions/InstEmitFlow32.cs +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -1,7 +1,9 @@ using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -20,7 +22,6 @@ namespace ARMeilleure.Instructions else { context.StoreToContext(); - context.Return(Const(op.Immediate)); } } @@ -35,15 +36,6 @@ namespace ARMeilleure.Instructions Blx(context, x: true); } - public static void Bx(ArmEmitterContext context) - { - IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; - - context.StoreToContext(); - - EmitBxWritePc(context, GetIntA32(context, op.Rm)); - } - private static void Blx(ArmEmitterContext context, bool x) { IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; @@ -53,10 +45,10 @@ namespace ARMeilleure.Instructions bool isThumb = IsThumb(context.CurrOp); uint currentPc = isThumb - ? op.GetPc() | 1 - : op.GetPc() - 4; + ? pc | 1 + : pc - 4; - SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); // If x is true, then this is a branch with link and exchange. // In this case we need to swap the mode between Arm <-> Thumb. @@ -67,5 +59,37 @@ namespace ARMeilleure.Instructions InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); } + + public static void Blxr(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + uint pc = op.GetPc(); + + Operand addr = GetIntA32(context, op.Rm); + Operand bitOne = context.BitwiseAnd(addr, Const(1)); + addr = context.BitwiseOr(addr, Const((int)CallFlag)); // Set call flag. + + bool isThumb = IsThumb(context.CurrOp); + + uint currentPc = isThumb + ? pc | 1 + : pc - 4; + + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + SetFlag(context, PState.TFlag, bitOne); + + context.Return(addr); // Call. + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + context.StoreToContext(); + + EmitBxWritePc(context, GetIntA32(context, op.Rm)); + } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index 02e104a4ff..f5495c6600 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -43,10 +43,15 @@ namespace ARMeilleure.Instructions } else { - return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex)); + return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); } } + public static Operand GetVecA32(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) { if (regIndex == RegisterAlias.Aarch32Pc) @@ -57,7 +62,13 @@ namespace ARMeilleure.Instructions } else { - SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value); + if (value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + + context.Copy(reg, value); } } @@ -143,11 +154,12 @@ namespace ARMeilleure.Instructions context.BranchIfTrue(lblArmMode, mode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1)))); + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)))); context.MarkLabel(lblArmMode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3)))); + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(pc, Const(~3)), Const((int)InstEmitFlowHelper.CallFlag)))); } public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) @@ -208,11 +220,21 @@ namespace ARMeilleure.Instructions return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); } + public static Operand GetFpFlag(FPState stateFlag) + { + return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32); + } + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) { context.Copy(GetFlag(stateFlag), value); context.MarkFlagSet(stateFlag); } + + public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value) + { + context.Copy(GetFpFlag(stateFlag), value); + } } } diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs index 002d2c5c65..ffd816b299 100644 --- a/ARMeilleure/Instructions/InstEmitMemory32.cs +++ b/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -20,9 +20,11 @@ namespace ARMeilleure.Instructions [Flags] enum AccessType { - Store = 0, - Signed = 1, - Load = 2, + Store = 0, + Signed = 1, + Load = 2, + Ordered = 4, + Exclusive = 8, LoadZx = Load, LoadSx = Load | Signed, @@ -95,7 +97,7 @@ namespace ARMeilleure.Instructions { OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; - Operand n = GetIntA32(context, op.Rn); + Operand n = context.Copy(GetIntA32(context, op.Rn)); Operand baseAddress = context.Add(n, Const(op.Offset)); @@ -152,14 +154,15 @@ namespace ARMeilleure.Instructions OpCode32Mem op = (OpCode32Mem)context.CurrOp; Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); Operand temp = null; if (op.Index || op.WBack) { temp = op.Add - ? context.Add (n, Const(op.Immediate)) - : context.Subtract(n, Const(op.Immediate)); + ? context.Add (n, m) + : context.Subtract(n, m); } if (op.WBack) diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs index bcca7619d2..93c20cb588 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryEx.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -5,6 +5,7 @@ using System; using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -66,7 +67,7 @@ namespace ARMeilleure.Instructions // method to read 128-bits atomically. if (op.Size == 2) { - Operand value = EmitLoad(context, address, exclusive, 3); + Operand value = EmitLoadExclusive(context, address, exclusive, 3); Operand valueLow = context.ConvertI64ToI32(value); @@ -79,7 +80,7 @@ namespace ARMeilleure.Instructions } else if (op.Size == 3) { - Operand value = EmitLoad(context, address, exclusive, 4); + Operand value = EmitLoadExclusive(context, address, exclusive, 4); Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); @@ -95,46 +96,11 @@ namespace ARMeilleure.Instructions else { // 8, 16, 32 or 64-bits (non-pairwise) load. - Operand value = EmitLoad(context, address, exclusive, op.Size); + Operand value = EmitLoadExclusive(context, address, exclusive, op.Size); SetIntOrZR(context, op.Rt, value); } } - - private static Operand EmitLoad( - ArmEmitterContext context, - Operand address, - bool exclusive, - int size) - { - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; - } - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; - } - } - - return context.Call(fallbackMethodDlg, address); - } - public static void Pfrm(ArmEmitterContext context) { // Memory Prefetch, execute as no-op. @@ -192,11 +158,11 @@ namespace ARMeilleure.Instructions value = context.VectorInsert(value, t2, 1); } - s = EmitStore(context, address, value, exclusive, op.Size + 1); + s = EmitStoreExclusive(context, address, value, exclusive, op.Size + 1); } else { - s = EmitStore(context, address, t, exclusive, op.Size); + s = EmitStoreExclusive(context, address, t, exclusive, op.Size); } if (s != null) @@ -207,50 +173,6 @@ namespace ARMeilleure.Instructions } } - private static Operand EmitStore( - ArmEmitterContext context, - Operand address, - Operand value, - bool exclusive, - int size) - { - if (size < 3) - { - value = context.ConvertI64ToI32(value); - } - - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break; - case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; - } - - return context.Call(fallbackMethodDlg, address, value); - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; - case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; - case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; - case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; - case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; - } - - context.Call(fallbackMethodDlg, address, value); - - return null; - } - } - private static void EmitBarrier(ArmEmitterContext context) { // Note: This barrier is most likely not necessary, and probably diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs new file mode 100644 index 0000000000..0ab990f872 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs @@ -0,0 +1,240 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Clrex(ArmEmitterContext context) + { + context.Call(new _Void(NativeInterface.ClearExclusive)); + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldrex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Lda(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldab(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldaex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldah(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + // Stores. + + public static void Strex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Stl(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp; + + Operand address = context.Copy(GetIntA32(context, op.Rn)); + + var exclusive = (accType & AccessType.Exclusive) != 0; + var ordered = (accType & AccessType.Ordered) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + // Keep loads atomic - make the call to get the whole region and then decompose it into parts + // for the registers. + + Operand value = EmitLoadExclusive(context, address, exclusive, size); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + SetIntA32(context, op.Rt, valueLow); + SetIntA32(context, op.Rt | 1, valueHigh); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + SetIntA32(context, op.Rt | 1, valueLow); + SetIntA32(context, op.Rt, valueHigh); + + context.MarkLabel(lblEnd); + } + else + { + SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size)); + } + } + else + { + if (size == DWordSizeLog2) + { + // Split the result into 2 words (based on endianness) + + Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)); + Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt | 1)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32))); + Operand leS = EmitStoreExclusive(context, address, leResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, leS); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32))); + Operand beS = EmitStoreExclusive(context, address, beResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, beS); + } + + context.MarkLabel(lblEnd); + } + else + { + Operand s = EmitStoreExclusive(context, address, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)), exclusive, size); + // This is only needed for exclusive stores. The function returns 0 + // when the store is successful, and 1 otherwise. + if (exclusive) + { + SetIntA32(context, op.Rd, s); + } + } + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs new file mode 100644 index 0000000000..00a5385bdf --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs @@ -0,0 +1,87 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryExHelper + { + public static Operand EmitLoadExclusive( + ArmEmitterContext context, + Operand address, + bool exclusive, + int size) + { + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByteExclusive); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; + } + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + } + + return context.Call(fallbackMethodDlg, address); + } + + public static Operand EmitStoreExclusive( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _S32_U64_U8(NativeInterface.WriteByteExclusive); break; + case 1: fallbackMethodDlg = new _S32_U64_U16(NativeInterface.WriteUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _S32_U64_U32(NativeInterface.WriteUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _S32_U64_U64(NativeInterface.WriteUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; + } + + return context.Call(fallbackMethodDlg, address, value); + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8(NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + context.Call(fallbackMethodDlg, address, value); + + return null; + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index e0b44353b4..70861d1634 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -53,7 +53,7 @@ namespace ARMeilleure.Instructions if (!isSimd) { - Operand value = GetIntOrZR(context, rt); + Operand value = GetInt(context, rt); if (ext == Extension.Sx32 || ext == Extension.Sx64) { @@ -67,7 +67,7 @@ namespace ARMeilleure.Instructions } } - SetIntOrZR(context, rt, value); + SetInt(context, rt, value); } } @@ -505,5 +505,68 @@ namespace ARMeilleure.Instructions SetIntOrZR(context, rt, value); } } + + // ARM32 helpers. + public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + case OpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case OpCode32MemReg op: return GetIntA32(context, op.Rm); + + case OpCode32Mem op: return Const(op.Immediate); + + case OpCode32SimdMemImm op: return Const(op.Immediate); + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32MemRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Immediate; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= false; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Immediate != 0) + { + m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift); + } + else + { + m = InstEmitAluHelper.GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMul32.cs b/ARMeilleure/Instructions/InstEmitMul32.cs new file mode 100644 index 0000000000..e64f3568ee --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMul32.cs @@ -0,0 +1,290 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + [Flags] + private enum MullFlags + { + Subtract = 1, + Add = 1 << 1, + Signed = 1 << 2, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + public static void Mla(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Add(a, context.Multiply(n, m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mls(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Subtract(a, context.Multiply(n, m)); + + EmitAluStore(context, res); + } + + public static void Smull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void Smmla(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedAdd); + } + + public static void Smmls(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedSubtract); + } + + public static void Smmul(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.Signed); + } + + private static void EmitSmmul(ArmEmitterContext context, MullFlags flags) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf) + { + res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + else if (flags.HasFlag(MullFlags.Subtract)) + { + res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + + if (op.R) + { + res = context.Add(res, Const(0x80000000L)); + } + + Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32))); + + EmitGenericAluStoreA32(context, op.Rd, false, hi); + } + + public static void Smlab(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I32, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + Operand a = GetIntA32(context, op.Ra); + res = context.Add(res, a); + + // TODO: set Q flag when last addition overflows (saturation)? + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smlal(ArmEmitterContext context) + { + EmitMlal(context, true); + } + + public static void Smlalh(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I64, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.ShiftRightSI(n, Const(16)); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.ShiftRightSI(m, Const(16)); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Umlal(ArmEmitterContext context) + { + EmitMlal(context, false); + } + + public static void Umull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void EmitMlal(ArmEmitterContext context, bool signed) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (signed) + { + n = context.SignExtend32(OperandType.I64, n); + m = context.SignExtend32(OperandType.I64, m); + } + else + { + n = context.ZeroExtend32(OperandType.I64, n); + m = context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs new file mode 100644 index 0000000000..4ee279ee0d --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -0,0 +1,634 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vabs_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + + public static void Vabs_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + public static void Vadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2)); + } + } + + public static void Vadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPAddFpscr, SoftFloat64.FPAddFpscr, op1, op2)); + } + } + + public static void Vadd_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vdup(ArmEmitterContext context) + { + OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp; + + Operand insert = GetIntA32(context, op.Rt); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd + 1, insert); + } + } + + public static void Vdup_1(ArmEmitterContext context) + { + OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp; + + Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd | 1, insert); + } + } + + public static void Vext(ArmEmitterContext context) + { + OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp; + + int elems = op.GetBytesCount(); + int byteOff = op.Immediate; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand extract; + + if (byteOff >= elems) + { + extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size); + } + else + { + extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size); + } + byteOff++; + + res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vmov_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => op1); + } + + public static void Vmovn(ArmEmitterContext context) + { + EmitVectorUnaryNarrowOp32(context, (op1) => op1); + } + + public static void Vneg_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); + } + + public static void Vnmul_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Vnmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Negate(context.Add(op1, context.Multiply(op2, op3))); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulAdd, SoftFloat64.FPNegMulAdd, op1, op2, op3); + }); + } + } + + public static void Vnmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(context.Negate(op1), context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulSub, SoftFloat64.FPNegMulSub, op1, op2, op3); + }); + } + } + + public static void Vneg_V(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32Simd).F) + { + EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1)); + } + } + + public static void Vdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Vmaxnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2)); + } + + public static void Vmaxnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2)); + } + + public static void Vminnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2)); + } + + public static void Vminnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2)); + } + + public static void Vmax_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2); + }); + } + + public static void Vmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2)); + } + } + + public static void Vmin_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinFpscr, SoftFloat64.FPMinFpscr, op1, op2); + }); + } + + public static void Vmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); + } + } + + public static void Vmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Vmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2); + }); + } + } + + public static void Vmul_I(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented"); + EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Vmul_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2)); + } + } + else + { + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + + public static void Vmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Vmla_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3); + }); + } + } + + public static void Vmla_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + + public static void Vmla_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Vmls_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3); + }); + } + } + + public static void Vmls_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + + public static void Vmls_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vpadd_V(ArmEmitterContext context) + { + EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vpadd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + + public static void Vrev(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => + { + switch (op.Opc) + { + case 0: + switch (op.Size) // Swap bytes. + { + default: + return op1; + case 1: + return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1); + case 2: + case 3: + return context.ByteSwap(op1); + } + case 1: + switch (op.Size) + { + default: + return op1; + case 2: + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16))); + case 3: + return context.BitwiseOr( + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))), + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16)))); + } + case 2: + // Swap upper and lower halves. + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32))); + } + + return op1; + }); + } + + public static void Vrecpe(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRecipEstimateFpscr, SoftFloat64.FPRecipEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrecpe not currently implemented."); + } + } + + public static void Vrecps(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStep, SoftFloat64.FPRecipStep, op1, op2); + }); + } + + public static void Vrsqrte(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRSqrtEstimateFpscr, SoftFloat64.FPRSqrtEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrsqrte not currently implemented."); + } + } + + public static void Vrsqrts(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStep, SoftFloat64.FPRSqrtStep, op1, op2); + }); + } + + public static void Vsel(ArmEmitterContext context) + { + OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp; + + Operand condition = null; + switch (op.Cc) + { + case OpCode32SimdSelMode.Eq: + condition = GetCondTrue(context, Condition.Eq); + break; + case OpCode32SimdSelMode.Ge: + condition = GetCondTrue(context, Condition.Ge); + break; + case OpCode32SimdSelMode.Gt: + condition = GetCondTrue(context, Condition.Gt); + break; + case OpCode32SimdSelMode.Vs: + condition = GetCondTrue(context, Condition.Vs); + break; + } + + EmitScalarBinaryOpI32(context, (op1, op2) => + { + return context.ConditionalSelect(condition, op1, op2); + }); + } + + public static void Vsqrt_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + + public static void Vsub_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs new file mode 100644 index 0000000000..3b2483ce57 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -0,0 +1,273 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit32 + { + public static void Vceq_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); + } + + public static void Vceq_I(ArmEmitterContext context) + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false); + } + + public static void Vceq_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false); + } + } + + public static void Vcge_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); + } + + public static void Vcge_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U); + } + + public static void Vcge_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true); + } + } + + public static void Vcgt_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); + } + + public static void Vcgt_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U); + } + + public static void Vcgt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true); + } + } + + public static void Vcle_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true); + } + } + + public static void Vclt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true); + } + } + + private static void EmitCmpOpF32( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + bool zero) + { + Operand one = Const(1); + if (zero) + { + EmitVectorUnaryOpF32(context, (m) => + { + OperandType type = m.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, m, ConstF(0.0), one); + } + else + { + return context.Call(f32, m, ConstF(0.0f), one); + } + }); + } + else + { + EmitVectorBinaryOpF32(context, (n, m) => + { + OperandType type = n.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, n, m, one); + } + else + { + return context.Call(f32, n, m, one); + } + }); + } + } + + private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType) + { + var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L)); + } + + private static void EmitCmpOpI32( + ArmEmitterContext context, + Func2I signedOp, + Func2I unsignedOp, + bool zero, + bool signed) + { + if (zero) + { + if (signed) + { + EmitVectorUnaryOpSx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, signedOp(m, zeroV), type); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, unsignedOp(m, zeroV), type); + }); + } + } + else + { + if (signed) + { + EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type)); + } + else + { + EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type)); + } + } + } + + public static void Vcmp(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, false); + } + + public static void Vcmpe(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, true); + } + + private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + { + int fSize = op.Size & 1; + OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = ExtractScalar(context, type, op.Vd); + Operand me; + + if (cmpWithZero) + { + me = fSize == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = ExtractScalar(context, type, op.Vm); + } + + Delegate dlg = fSize != 0 + ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) + : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); + + Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); + + EmitSetFPSCRFlags(context, nzcv); + } + } + + private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0)); + SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1)); + SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); + SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs new file mode 100644 index 0000000000..6ab089cb22 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -0,0 +1,274 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private static int FlipVdBits(int vd, bool lowBit) + { + if (lowBit) + { + // Move the low bit to the top. + return ((vd & 0x1) << 4) | (vd >> 1); + } + else + { + // Move the high bit to the bottom. + return ((vd & 0xf) << 1) | (vd >> 4); + } + } + + private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned) + { + if (op1.Type == OperandType.FP64) + { + if (unsigned) + { + return context.Call(new _U32_F64(SoftFallback.SatF64ToU32), op1); + } + else + { + return context.Call(new _S32_F64(SoftFallback.SatF64ToS32), op1); + } + + } + else + { + if (unsigned) + { + return context.Call(new _U32_F32(SoftFallback.SatF32ToU32), op1); + } + else + { + return context.Call(new _S32_F32(SoftFallback.SatF32ToS32), op1); + } + } + } + + public static void Vcvt_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + bool unsigned = (op.Opc & 1) != 0; + bool toInteger = (op.Opc & 2) != 0; + OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; + + if (toInteger) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSaturateFloatToInt(context, op1, unsigned); + }); + } + else + { + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } + } + + } + + public static void Vcvt_FD(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + int vm = op.Vm; + int vd; + if (op.Size == 3) + { + vd = FlipVdBits(op.Vd, false); + // Double to single. + Operand fp = ExtractScalar(context, OperandType.FP64, vm); + + Operand res = context.ConvertToFP(OperandType.FP32, fp); + + InsertScalar(context, vd, res); + } + else + { + vd = FlipVdBits(op.Vd, true); + // Single to double. + Operand fp = ExtractScalar(context, OperandType.FP32, vm); + + Operand res = context.ConvertToFP(OperandType.FP64, fp); + + InsertScalar(context, vd, res); + } + } + + public static void Vcvt_FI(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + bool toInteger = (op.Opc2 & 0b100) != 0; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + if (toInteger) + { + bool unsigned = (op.Opc2 & 1) == 0; + bool roundWithFpscr = op.Opc != 1; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + Operand asInteger; + + // TODO: Fast Path. + if (roundWithFpscr) + { + // These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point. + if (floatSize == OperandType.FP64) + { + if (unsigned) + { + asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert); + } + } + else + { + if (unsigned) + { + asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert); + } + } + } + else + { + // Round towards zero. + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + } + + InsertScalar(context, op.Vd, asInteger); + } + else + { + bool unsigned = op.Opc == 0; + + Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm); + + Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned); + + InsertScalar(context, op.Vd, asFloat); + } + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static void Vcvt_R(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + bool unsigned = (op.Opc & 1) == 0; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + Operand asInteger; + + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + + InsertScalar(context, op.Vd, asInteger); + } + + public static void Vrint_RM(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + InsertScalar(context, op.Vd, toConvert); + } + + public static void Vrint_Z(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1)); + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index fce1bed5cb..a87dac015a 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1528,7 +1528,7 @@ namespace ARMeilleure.Instructions { ThrowIfInvalid(index, size); - if (size < 3) + if (size < 3 && value.Type == OperandType.I64) { value = context.ConvertI64ToI32(value); } @@ -1544,7 +1544,7 @@ namespace ARMeilleure.Instructions return vector; } - private static void ThrowIfInvalid(int index, int size) + public static void ThrowIfInvalid(int index, int size) { if ((uint)size > 3u) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs new file mode 100644 index 0000000000..b13b1d874f --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -0,0 +1,581 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + using Func2I = Func; + using Func3I = Func; + + static class InstEmitSimdHelper32 + { + public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size) + { + switch (size) + { + case RegisterSize.Simd128: + return (index >> 1, 0); + case RegisterSize.Simd64: + case RegisterSize.Int64: + return (index >> 1, index & 1); + case RegisterSize.Int32: + return (index >> 2, index & 3); + } + + throw new ArgumentException("Unrecognized Vector Register Size."); + } + + public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg) + { + Debug.Assert(type != OperandType.V128); + + if (type == OperandType.FP64 || type == OperandType.I64) + { + // From dreg. + return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1); + } + else + { + // From sreg. + return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3); + } + } + + public static void InsertScalar(ArmEmitterContext context, int reg, Operand value) + { + Debug.Assert(value.Type != OperandType.V128); + + Operand vec, insert; + if (value.Type == OperandType.FP64 || value.Type == OperandType.I64) + { + // From dreg. + vec = GetVecA32(reg >> 1); + insert = context.VectorInsert(vec, value, reg & 1); + + } + else + { + // From sreg. + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert(vec, value, reg & 3); + } + + context.Copy(vec, insert); + } + + public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit) + { + IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + int elems = op.Elems; + (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand vec = GetVecA32(index); + Operand res = vec; + + for (int item = 0; item < elems; item++) + { + res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size); + } + + context.Copy(vec, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(m)); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32; + + if (op.Size < 2) + { + throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits."); + } + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = ExtractScalar(context, type, op.Vd); + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(a, n, m)); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Integer + + public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, true); + } + + public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, true); + } + + public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, true); + } + + public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, false); + } + + public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, false); + } + + public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, false); + } + + // Vector by scalar + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + Operand mvec = GetVecA32(op.Qm); + Operand nvec = GetVecA32(op.Qn); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex); + Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1); + + res = context.VectorInsert(res, emit(n1, n2), op.Fd + index); + + Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex); + Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1); + + res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed); + + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size); + res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Narrow + + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) + + Operand res = GetVecA32(op.Qd); + int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. + + for (int index = 0; index < elems; index++) + { + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false); + + res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Generic Functions + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_Bool f32, + _F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_F32_Bool f32, + _F64_F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8(OperandType.I32, res); break; + case 1: res = context.SignExtend16(OperandType.I32, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8(OperandType.I32, res); break; + case 1: res = context.ZeroExtend16(OperandType.I32, res); break; + } + } + + return res; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs new file mode 100644 index 0000000000..e2e9e18ee1 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -0,0 +1,56 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vand_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + + public static void Vbif(ArmEmitterContext context) + { + EmitBifBit(context, true); + } + + public static void Vbit(ArmEmitterContext context) + { + EmitBifBit(context, false); + } + + public static void Vbsl(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + + public static void Vorr_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryOpZx32(context, (d, n, m) => + { + if (notRm) + { + m = context.BitwiseNot(m); + } + return context.BitwiseExclusiveOr( + context.BitwiseAnd(m, + context.BitwiseExclusiveOr(d, n)), d); + }); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs new file mode 100644 index 0000000000..fb9931d811 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -0,0 +1,352 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vld1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, true); + } + + public static void Vld2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, true); + } + + public static void Vld3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, true); + } + + public static void Vld4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, true); + } + + public static void Vst1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, false); + } + + public static void Vst2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, false); + } + + public static void Vst3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, false); + } + + public static void Vst4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, false); + } + + public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load) + { + if (context.CurrOp is OpCode32SimdMemSingle) + { + OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + // TODO: Check alignment. + int offset = 0; + int d = op.Vd; + + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register. + Operand address = context.Add(n, Const(offset)); + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, d); + } + else + { + EmitDVectorStore(context, address, d); + } + } + else + { + int index = ((d & 1) << (3 - op.Size)) + op.Index; + if (load) + { + if (op.Replicate) + { + var regs = (count > 1) ? 1 : op.Increment; + for (int reg = 0; reg < regs; reg++) + { + int dreg = reg + d; + int rIndex = ((dreg & 1) << (3 - op.Size)); + int limit = rIndex + (1 << (3 - op.Size)); + + while (rIndex < limit) + { + EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size); + } + } + } + else + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + } + else + { + EmitStoreSimd(context, address, d >> 1, index, op.Size); + } + } + offset += eBytes; + d += op.Increment; + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes))); + } + } + } + else + { + OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + int offset = 0; + int d = op.Vd; + + for (int reg = 0; reg < op.Regs; reg++) + { + for (int elem = 0; elem < op.Elems; elem++) + { + int elemD = d + reg; + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register + // add ebytes for each element. + Operand address = context.Add(n, Const(offset)); + int index = ((elemD & 1) << (3 - op.Size)) + elem; + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, elemD); + } + else + { + EmitDVectorStore(context, address, elemD); + } + } + else + { + + if (load) + { + EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, elemD >> 1, index, op.Size); + } + } + + offset += eBytes; + elemD += op.Increment; + } + } + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs))); + } + } + } + } + + public static void Vldm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int range = op.RegisterRange; + + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int offset = 0; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + Operand vec = GetVecA32(sReg >> 2); + + EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2); + offset += byteSize; + } + } + + public static void Vstm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int offset = 0; + + int range = op.RegisterRange; + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2); + + offset += byteSize; + } + } + + public static void Vldr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Load); + } + + public static void Vstr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Store); + } + + private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand vec = GetVecA32(vecQ); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType) + { + OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); + + Operand address = op.Add + ? context.Add(n, m) + : context.Subtract(n, m); + + int size = op.Size; + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + EmitDVectorLoad(context, address, op.Vd); + } + else + { + Operand vec = GetVecA32(op.Vd >> 2); + EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + else + { + if (size == DWordSizeLog2) + { + EmitDVectorStore(context, address, op.Vd); + } + else + { + EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs new file mode 100644 index 0000000000..3fd42cbf46 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -0,0 +1,336 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vmov_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => op1); + } + + public static void Vmvn_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseExclusiveOr(op1, op1)); + } + + public static void Vmov_GS(ArmEmitterContext context) + { + OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; + + Operand vec = GetVecA32(op.Vn >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); + } + } + + public static void Vmov_G1(ArmEmitterContext context) + { + OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; + + int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); + if (op.Op == 1) + { + // To general purpose. + Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand vec = GetVecA32(op.Vd >> 1); + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); + } + } + + public static void Vmov_G2(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 2); + int vm1 = op.Vm + 1; + bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); + Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); + SetIntA32(context, op.Rt, lowValue); + + Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); + SetIntA32(context, op.Rt2, highValue); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); + + Operand highValue = GetIntA32(context, op.Rt2); + + if (sameOwnerVec) + { + context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); + } + else + { + context.Copy(vec, resultVec); + context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); + } + } + } + + public static void Vmov_GD(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 1); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1); + SetIntA32(context, op.Rt, context.ConvertI64ToI32(value)); + SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32)))); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand highValue = GetIntA32(context, op.Rt2); + + Operand value = context.BitwiseOr( + context.ZeroExtend32(OperandType.I64, lowValue), + context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32))); + + context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); + } + } + + public static void Vtbl(ArmEmitterContext context) + { + OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; + + bool extension = op.Opc == 1; + + int elems = op.GetBytesCount() >> op.Size; + + int length = op.Length + 1; + + (int Qx, int Ix)[] tableTuples = new (int, int)[length]; + for (int i = 0; i < length; i++) + { + (int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); + tableTuples[i] = (vn, en); + } + + int byteLength = length * 8; + + Operand res = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); + + Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); + Operand elemRes = null; // Note: This is I64 for ease of calculation. + + // TODO: Branching rather than conditional select. + + // Get indexed byte. + // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result. + // This does have to extract `length` times for every element but certainly not as bad as it could be. + + // Which vector number is the index on. + Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); + // What should we shift by to extract it. + Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); + + for (int i = 0; i < length; i++) + { + (int qx, int ix) = tableTuples[i]; + // Get the whole vector, we'll get a byte out of it. + Operand lookupResult; + if (qx == op.Qd) + { + // Result contains the current state of the vector. + lookupResult = context.VectorExtract(OperandType.I64, res, ix); + } + else + { + lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64 + } + + lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector. + + if (i == 0) + { + elemRes = lookupResult; // First result is always default. + } + else + { + Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); + elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); + } + } + + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); + + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vtrn(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); + + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vzip(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); + + Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); + + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vuzp(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand dIns, mIns; + if (index >= pairs) + { + int pind = index - pairs; + dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false); + } + else + { + dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); + } + + resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs new file mode 100644 index 0000000000..893854763e --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vshl(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift))); + } + + public static void Vshl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false)); + } + } + + public static void Vshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift))); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift)))); + } + } + + public static void Vshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + + EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); + } + + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned) + { + if (shiftLsB.Type == OperandType.I64) + { + shiftLsB = context.ConvertI64ToI32(shiftLsB); + } + + shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB); + Debug.Assert((uint)size < 4u); + + Operand negShiftLsB = context.Negate(shiftLsB); + + Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0)); + + Operand shl = context.ShiftLeft(op, shiftLsB); + Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB); + + Operand res = context.ConditionalSelect(isPositive, shl, shr); + + if (unsigned) + { + Operand isOutOfRange = context.BitwiseOr( + context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)), + context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size))); + + return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res); + } + else + { + Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)); + Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)); + + // Also zero if shift is too negative, but value was positive. + isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0)))); + + Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSystem32.cs b/ARMeilleure/Instructions/InstEmitSystem32.cs new file mode 100644 index 0000000000..808b4fdd7a --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSystem32.cs @@ -0,0 +1,233 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Mcr(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _Void_U32(NativeInterface.SetTpidrEl032); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + + case 7: + switch (op.CRm) // Cache and Memory barrier. + { + case 10: + switch (op.Opc2) + { + case 5: // Data Memory Barrier Register. + return; // No-op. + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + default: + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + public static void Mrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _U32(NativeInterface.GetTpidrEl032); break; + case 3: + dlg = new _U32(NativeInterface.GetTpidr32); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + if (op.Rt == RegisterAlias.Aarch32Pc) + { + // Special behavior: copy NZCV flags into APSR. + EmitSetNzcv(context, context.Call(dlg)); + + return; + } + else + { + SetIntA32(context, op.Rt, context.Call(dlg)); + } + } + + public static void Mrrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + var opc = op.MrrcOp; + + Delegate dlg; + switch (op.CRm) + { + case 14: // Timer. + switch (opc) + { + case 0: + dlg = new _U64(NativeInterface.GetCntpctEl0); break; + default: + throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + Operand result = context.Call(dlg); + + SetIntA32(context, op.Rt, context.ConvertI64ToI32(result)); + SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32)))); + } + + public static void Nop(ArmEmitterContext context) { } + + public static void Vmrs(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001) + { + // Special behavior: copy NZCV flags into APSR. + SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag)); + SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag)); + SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag)); + SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag)); + return; + } + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _U32(NativeInterface.GetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntA32(context, op.Rt, context.Call(dlg)); + } + + public static void Vmsr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _Void_U32(NativeInterface.SetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand t) + { + Operand v = context.ShiftRightUI(t, Const((int)PState.VFlag)); + v = context.BitwiseAnd(v, Const(1)); + + Operand c = context.ShiftRightUI(t, Const((int)PState.CFlag)); + c = context.BitwiseAnd(c, Const(1)); + + Operand z = context.ShiftRightUI(t, Const((int)PState.ZFlag)); + z = context.BitwiseAnd(z, Const(1)); + + Operand n = context.ShiftRightUI(t, Const((int)PState.NFlag)); + n = context.BitwiseAnd(n, Const(1)); + + SetFlag(context, PState.VFlag, v); + SetFlag(context, PState.CFlag, c); + SetFlag(context, PState.ZFlag, z); + SetFlag(context, PState.NFlag, n); + } + } +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index c81484a6f4..0c2dd18d49 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -82,6 +82,7 @@ namespace ARMeilleure.Instructions Smaddl, Smsubl, Smulh, + Smull, Stlr, Stlxp, Stlxr, @@ -92,6 +93,8 @@ namespace ARMeilleure.Instructions Sub, Subs, Svc, + Sxtb, + Sxth, Sys, Tbnz, Tbz, @@ -445,19 +448,140 @@ namespace ARMeilleure.Instructions Zip2_V, // Base (AArch32) + Bfc, + Bfi, Blx, Bx, Cmp, + Cmn, + Movt, + Mul, + Lda, + Ldab, + Ldaex, + Ldaexb, + Ldaexd, + Ldaexh, + Ldah, Ldm, Ldrb, Ldrd, + Ldrex, + Ldrexb, + Ldrexd, + Ldrexh, Ldrh, Ldrsb, Ldrsh, + Mcr, + Mla, + Mls, Mov, + Mrc, + Mrrc, + Mvn, + Pkh, + Pld, + Rev, + Revsh, + Rsb, + Rsc, + Sbfx, + Smlab, + Smlal, + Smlalh, + Smmla, + Smmls, + Smmul, + Stl, + Stlb, + Stlex, + Stlexb, + Stlexd, + Stlexh, + Stlh, Stm, Strb, Strd, - Strh + Strex, + Strexb, + Strexd, + Strexh, + Strh, + Sxtb16, + Teq, + Trap, + Tst, + Ubfx, + Umlal, + Umull, + Uxtb, + Uxtb16, + Uxth, + + // FP & SIMD (AArch32) + Vabs, + Vadd, + Vand, + Vbif, + Vbit, + Vbsl, + Vceq, + Vcge, + Vcgt, + Vcle, + Vclt, + Vcmp, + Vcmpe, + Vcvt, + Vdiv, + Vdup, + Vext, + Vld1, + Vld2, + Vld3, + Vld4, + Vldm, + Vldr, + Vmax, + Vmaxnm, + Vmin, + Vminnm, + Vmla, + Vmls, + Vmov, + Vmovn, + Vmrs, + Vmsr, + Vmul, + Vmvn, + Vneg, + Vnmul, + Vnmla, + Vnmls, + Vorr, + Vpadd, + Vrev, + Vrint, + Vsel, + Vshl, + Vshr, + Vshrn, + Vst1, + Vst2, + Vst3, + Vst4, + Vstm, + Vstr, + Vsqrt, + Vrecpe, + Vrecps, + Vrsqrte, + Vrsqrts, + Vsub, + Vtbl, + Vtrn, + Vuzp, + Vzip, } } diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index 3a1e91c8ed..988e86bd77 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -87,16 +87,39 @@ namespace ARMeilleure.Instructions return (ulong)GetContext().Fpsr; } + public static uint GetFpscr() + { + ExecutionContext context = GetContext(); + uint result = (uint)(context.Fpsr & FPSR.A32Mask) | (uint)(context.Fpcr & FPCR.A32Mask); + + result |= context.GetFPstateFlag(FPState.NFlag) ? (1u << 31) : 0; + result |= context.GetFPstateFlag(FPState.ZFlag) ? (1u << 30) : 0; + result |= context.GetFPstateFlag(FPState.CFlag) ? (1u << 29) : 0; + result |= context.GetFPstateFlag(FPState.VFlag) ? (1u << 28) : 0; + + return result; + } + public static ulong GetTpidrEl0() { return (ulong)GetContext().TpidrEl0; } + public static uint GetTpidrEl032() + { + return (uint)GetContext().TpidrEl0; + } + public static ulong GetTpidr() { return (ulong)GetContext().Tpidr; } + public static uint GetTpidr32() + { + return (uint)GetContext().Tpidr; + } + public static ulong GetCntfrqEl0() { return GetContext().CntfrqEl0; @@ -117,13 +140,31 @@ namespace ARMeilleure.Instructions GetContext().Fpsr = (FPSR)value; } + public static void SetFpscr(uint value) + { + ExecutionContext context = GetContext(); + + context.SetFPstateFlag(FPState.NFlag, (value & (1u << 31)) != 0); + context.SetFPstateFlag(FPState.ZFlag, (value & (1u << 30)) != 0); + context.SetFPstateFlag(FPState.CFlag, (value & (1u << 29)) != 0); + context.SetFPstateFlag(FPState.VFlag, (value & (1u << 28)) != 0); + + context.Fpsr = FPSR.A32Mask & (FPSR)value; + context.Fpcr = FPCR.A32Mask & (FPCR)value; + } + public static void SetTpidrEl0(ulong value) { GetContext().TpidrEl0 = (long)value; } -#endregion -#region "Read" + public static void SetTpidrEl032(uint value) + { + GetContext().TpidrEl0 = (long)value; + } + #endregion + + #region "Read" public static byte ReadByte(ulong address) { return GetMemoryManager().ReadByte((long)address); diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index 10bb47df54..611e8d6aca 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -420,6 +420,26 @@ namespace ARMeilleure.Instructions return MathF.Truncate(value); } } + + public static int FloatToInt32(float value) + { + return SatF32ToS32(RoundF(value)); + } + + public static int DoubleToInt32(double value) + { + return SatF64ToS32(Round(value)); + } + + public static uint FloatToUInt32(float value) + { + return SatF32ToU32(RoundF(value)); + } + + public static uint DoubleToUInt32(double value) + { + return SatF64ToU32(Round(value)); + } #endregion #region "Saturation" diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index 256bc5b975..d3e15a2ced 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -121,7 +121,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -622,13 +622,19 @@ namespace ARMeilleure.Instructions static class SoftFloat32 { public static float FPAdd(float value1, float value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static float FPAddFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -639,7 +645,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -657,7 +663,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -672,9 +678,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(float value1, float value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -684,7 +691,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -708,10 +715,16 @@ namespace ARMeilleure.Instructions public static float FPCompareEQ(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareEQFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -721,7 +734,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -734,10 +747,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGE(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGEFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -745,7 +764,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -757,10 +776,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGT(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGTFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -768,7 +793,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -788,14 +813,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static float FPDiv(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -806,7 +842,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -814,7 +850,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -825,7 +861,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -838,13 +874,19 @@ namespace ARMeilleure.Instructions } public static float FPMax(float value1, float value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static float FPMaxFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -877,7 +919,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -892,10 +934,16 @@ namespace ARMeilleure.Instructions public static float FPMaxNum(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMaxNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -906,17 +954,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static float FPMin(float value1, float value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static float FPMinFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -949,7 +1003,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -964,10 +1018,16 @@ namespace ARMeilleure.Instructions public static float FPMinNum(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMinNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -978,17 +1038,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static float FPMul(float value1, float value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static float FPMulFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -999,7 +1065,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -1013,7 +1079,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1027,22 +1093,28 @@ namespace ARMeilleure.Instructions public static float FPMulAdd(float valueA, float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMulAddFpscr(valueA, value1, value2, false); + } - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -1057,7 +1129,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -1075,7 +1147,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1094,14 +1166,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static float FPMulX(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1124,7 +1204,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1153,15 +1233,21 @@ namespace ARMeilleure.Instructions public static float FPRecipEstimate(float value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRecipEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + public static float FPRecipEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -1171,13 +1257,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (MathF.Abs(value) < MathF.Pow(2f, -128)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -1188,10 +1274,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) { result = FPZero(sign); @@ -1240,16 +1326,49 @@ namespace ARMeilleure.Instructions return result; } + public static float FPRecipStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static float FPRecipStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1268,7 +1387,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 2f); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1283,14 +1402,15 @@ namespace ARMeilleure.Instructions public static float FPRecpX(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -1306,27 +1426,33 @@ namespace ARMeilleure.Instructions public static float FPRSqrtEstimate(float value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRSqrtEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -1369,16 +1495,95 @@ namespace ARMeilleure.Instructions return result; } + public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0f; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRSqrtStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static float FPRSqrtStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1397,7 +1602,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1412,14 +1617,15 @@ namespace ARMeilleure.Instructions public static float FPSqrt(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -1433,13 +1639,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = MathF.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1451,13 +1657,19 @@ namespace ARMeilleure.Instructions } public static float FPSub(float value1, float value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static float FPSubFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1468,7 +1680,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -1486,7 +1698,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1500,7 +1712,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -1523,6 +1735,11 @@ namespace ARMeilleure.Instructions return sign ? -2f : +2f; } + private static float FPThree(bool sign) + { + return sign ? -3f : +3f; + } + private static float FPOnePointFive(bool sign) { return sign ? -1.5f : +1.5f; @@ -1543,7 +1760,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out uint valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (uint)BitConverter.SingleToInt32Bits(value); @@ -1551,14 +1769,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7F800000u) == 0u) { - if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x007FFFFFu) != 0u) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -1592,25 +1810,26 @@ namespace ARMeilleure.Instructions uint op1, uint op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -1626,33 +1845,34 @@ namespace ARMeilleure.Instructions uint op2, uint op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -1660,16 +1880,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1u << 22; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -1677,11 +1897,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int32BitsToSingle((int)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } @@ -1695,13 +1915,19 @@ namespace ARMeilleure.Instructions static class SoftFloat64 { public static double FPAdd(double value1, double value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static double FPAddFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1712,7 +1938,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -1730,7 +1956,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1745,9 +1971,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(double value1, double value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -1757,7 +1984,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1781,10 +2008,16 @@ namespace ARMeilleure.Instructions public static double FPCompareEQ(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareEQFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1794,7 +2027,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1807,10 +2040,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGE(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGEFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1818,7 +2057,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1830,10 +2069,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGT(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGTFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1841,7 +2086,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1861,14 +2106,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static double FPDiv(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1879,7 +2135,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -1887,7 +2143,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -1898,7 +2154,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1911,13 +2167,19 @@ namespace ARMeilleure.Instructions } public static double FPMax(double value1, double value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static double FPMaxFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1950,7 +2212,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1965,10 +2227,16 @@ namespace ARMeilleure.Instructions public static double FPMaxNum(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMaxNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -1979,17 +2247,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static double FPMin(double value1, double value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static double FPMinFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2022,7 +2296,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2037,10 +2311,16 @@ namespace ARMeilleure.Instructions public static double FPMinNum(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMinNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -2051,17 +2331,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static double FPMul(double value1, double value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static double FPMulFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2072,7 +2358,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -2086,7 +2372,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2100,22 +2386,28 @@ namespace ARMeilleure.Instructions public static double FPMulAdd(double valueA, double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMulAddFpscr(valueA, value1, value2, false); + } - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -2130,7 +2422,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -2148,7 +2440,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2167,14 +2459,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static double FPMulX(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2197,7 +2497,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2226,15 +2526,21 @@ namespace ARMeilleure.Instructions public static double FPRecipEstimate(double value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRecipEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + public static double FPRecipEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -2244,13 +2550,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (Math.Abs(value) < Math.Pow(2d, -1024)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -2261,10 +2567,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) { result = FPZero(sign); @@ -2313,16 +2619,49 @@ namespace ARMeilleure.Instructions return result; } + public static double FPRecipStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static double FPRecipStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2341,7 +2680,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 2d); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2356,14 +2695,15 @@ namespace ARMeilleure.Instructions public static double FPRecpX(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -2379,27 +2719,33 @@ namespace ARMeilleure.Instructions public static double FPRSqrtEstimate(double value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRSqrtEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -2442,16 +2788,95 @@ namespace ARMeilleure.Instructions return result; } + public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRSqrtStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static double FPRSqrtStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2470,7 +2895,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2485,14 +2910,15 @@ namespace ARMeilleure.Instructions public static double FPSqrt(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -2506,13 +2932,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = Math.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2524,13 +2950,19 @@ namespace ARMeilleure.Instructions } public static double FPSub(double value1, double value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static double FPSubFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2541,7 +2973,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -2559,7 +2991,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2573,7 +3005,7 @@ namespace ARMeilleure.Instructions private static double FPDefaultNaN() { - return -double.NaN; + return BitConverter.Int64BitsToDouble(0x7ff8000000000000); } private static double FPInfinity(bool sign) @@ -2596,6 +3028,11 @@ namespace ARMeilleure.Instructions return sign ? -2d : +2d; } + private static double FPThree(bool sign) + { + return sign ? -3d : +3d; + } + private static double FPOnePointFive(bool sign) { return sign ? -1.5d : +1.5d; @@ -2616,7 +3053,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out ulong valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); @@ -2624,14 +3062,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7FF0000000000000ul) == 0ul) { - if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -2665,25 +3103,26 @@ namespace ARMeilleure.Instructions ulong op1, ulong op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -2699,33 +3138,34 @@ namespace ARMeilleure.Instructions ulong op2, ulong op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -2733,16 +3173,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1ul << 51; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -2750,11 +3190,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int64BitsToDouble((long)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } diff --git a/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs index e71795cb94..88ac6c1246 100644 --- a/ARMeilleure/IntermediateRepresentation/RegisterType.cs +++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs @@ -4,6 +4,7 @@ namespace ARMeilleure.IntermediateRepresentation { Integer, Vector, - Flag + Flag, + FpFlag } } \ No newline at end of file diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs index 406766803e..482665dbfe 100644 --- a/ARMeilleure/State/ExecutionContext.cs +++ b/ARMeilleure/State/ExecutionContext.cs @@ -36,6 +36,7 @@ namespace ARMeilleure.State public FPCR Fpcr { get; set; } public FPSR Fpsr { get; set; } + public FPCR StandardFpcrValue => (Fpcr & (FPCR.Ahp)) | FPCR.Dn | FPCR.Fz; public bool IsAarch32 { get; set; } @@ -90,6 +91,9 @@ namespace ARMeilleure.State public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag); public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value); + public bool GetFPstateFlag(FPState flag) => _nativeContext.GetFPStateFlag(flag); + public void SetFPstateFlag(FPState flag, bool value) => _nativeContext.SetFPStateFlag(flag, value); + internal void CheckInterrupt() { if (_interrupted) diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs index 511681fa94..913065ea32 100644 --- a/ARMeilleure/State/FPCR.cs +++ b/ARMeilleure/State/FPCR.cs @@ -3,12 +3,14 @@ using System; namespace ARMeilleure.State { [Flags] - public enum FPCR + public enum FPCR : uint { Ufe = 1 << 11, Fz = 1 << 24, Dn = 1 << 25, - Ahp = 1 << 26 + Ahp = 1 << 26, + + A32Mask = 0x07ffff00 } public static class FPCRExtensions diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs index c20dc43930..47323b35c5 100644 --- a/ARMeilleure/State/FPSR.cs +++ b/ARMeilleure/State/FPSR.cs @@ -3,9 +3,11 @@ using System; namespace ARMeilleure.State { [Flags] - public enum FPSR + public enum FPSR : uint { Ufc = 1 << 3, - Qc = 1 << 27 + Qc = 1 << 27, + + A32Mask = 0xf800000f } } diff --git a/ARMeilleure/State/FPState.cs b/ARMeilleure/State/FPState.cs new file mode 100644 index 0000000000..2fe2a567e0 --- /dev/null +++ b/ARMeilleure/State/FPState.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPState + { + VFlag = 28, + CFlag = 29, + ZFlag = 30, + NFlag = 31 + } +} diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs index 4e6a5302f5..eb54505c6a 100644 --- a/ARMeilleure/State/NativeContext.cs +++ b/ARMeilleure/State/NativeContext.cs @@ -14,7 +14,8 @@ namespace ARMeilleure.State private const int TotalSize = RegisterConsts.IntRegsCount * IntSize + RegisterConsts.VecRegsCount * VecSize + - RegisterConsts.FlagsCount * FlagSize + ExtraSize; + RegisterConsts.FlagsCount * FlagSize + + RegisterConsts.FpFlagsCount * FlagSize + ExtraSize; public IntPtr BasePtr { get; } @@ -100,6 +101,38 @@ namespace ARMeilleure.State Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0); } + public bool GetFPStateFlag(FPState flag) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + (int)flag * FlagSize; + + int value = Marshal.ReadInt32(BasePtr, offset); + + return value != 0; + } + + public void SetFPStateFlag(FPState flag, bool value) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + (int)flag * FlagSize; + + Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0); + } + public int GetCounter() { return Marshal.ReadInt32(BasePtr, GetCounterOffset()); @@ -144,9 +177,10 @@ namespace ARMeilleure.State public static int GetCounterOffset() { - return RegisterConsts.IntRegsCount * IntSize + - RegisterConsts.VecRegsCount * VecSize + - RegisterConsts.FlagsCount * FlagSize; + return RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + + RegisterConsts.FpFlagsCount * FlagSize; } public void Dispose() diff --git a/ARMeilleure/State/RegisterAlias.cs b/ARMeilleure/State/RegisterAlias.cs index ae0d456283..7ebfa27535 100644 --- a/ARMeilleure/State/RegisterAlias.cs +++ b/ARMeilleure/State/RegisterAlias.cs @@ -32,6 +32,7 @@ namespace ARMeilleure.State public const int SpFiq = 29; public const int LrFiq = 30; + public const int Aarch32Sp = 13; public const int Aarch32Lr = 14; public const int Aarch32Pc = 15; diff --git a/ARMeilleure/State/RegisterConsts.cs b/ARMeilleure/State/RegisterConsts.cs index a85117bb2b..d629408081 100644 --- a/ARMeilleure/State/RegisterConsts.cs +++ b/ARMeilleure/State/RegisterConsts.cs @@ -5,8 +5,10 @@ namespace ARMeilleure.State public const int IntRegsCount = 32; public const int VecRegsCount = 32; public const int FlagsCount = 32; + public const int FpFlagsCount = 32; public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount; - public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount; + public const int FpFlagsOffset = IntRegsCount + VecRegsCount + FlagsCount; + public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount + FpFlagsCount; public const int ZeroIndex = 31; } diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs index becaa24cd9..84dfce7b57 100644 --- a/ARMeilleure/Translation/RegisterUsage.cs +++ b/ARMeilleure/Translation/RegisterUsage.cs @@ -10,6 +10,7 @@ namespace ARMeilleure.Translation { private const long CallerSavedIntRegistersMask = 0x7fL << 9; private const long PStateNzcvFlagsMask = 0xfL << 60; + private const long FpStateNzcvFlagsMask = 0xfL << 60; private const long CallerSavedVecRegistersMask = 0xffffL << 16; @@ -68,7 +69,7 @@ namespace ARMeilleure.Translation } } - public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction) + public static void RunPass(ControlFlowGraph cfg, ExecutionMode mode, bool isCompleteFunction) { // Compute local register inputs and outputs used inside blocks. RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count]; @@ -205,8 +206,8 @@ namespace ARMeilleure.Translation // It always needs a context load as it is the first block to run. if (block.Predecessors.Count == 0 || hasContextLoad) { - LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector); - LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer); + LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode); + LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode); } bool hasContextStore = HasContextStore(block); @@ -218,8 +219,8 @@ namespace ARMeilleure.Translation if (EndsWithReturn(block) || hasContextStore) { - StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction); - StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction); + StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, isCompleteFunction); + StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, isCompleteFunction); } } } @@ -263,6 +264,7 @@ namespace ARMeilleure.Translation { case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break; case RegisterType.Integer: intMask = 1L << register.Index; break; + case RegisterType.FpFlag: vecMask = (1L << RegsCount) << register.Index; break; case RegisterType.Vector: vecMask = 1L << register.Index; break; } @@ -278,7 +280,7 @@ namespace ARMeilleure.Translation return oldValue != value; } - private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType) + private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType, ExecutionMode mode) { Operand arg0 = Local(OperandType.I64); @@ -291,7 +293,7 @@ namespace ARMeilleure.Translation continue; } - Operand dest = GetRegFromBit(bit, baseType); + Operand dest = GetRegFromBit(bit, baseType, mode); long offset = NativeContext.GetRegisterOffset(dest.GetRegister()); @@ -311,7 +313,7 @@ namespace ARMeilleure.Translation block.Operations.AddFirst(loadArg0); } - private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction) + private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, ExecutionMode mode, bool isCompleteFunction) { if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction) { @@ -319,7 +321,7 @@ namespace ARMeilleure.Translation { outputs = ClearCallerSavedIntRegs(outputs); } - else /* if (baseType == RegisterType.Vector) */ + else /* if (baseType == RegisterType.Vector || baseType == RegisterType.FpFlag) */ { outputs = ClearCallerSavedVecRegs(outputs); } @@ -340,7 +342,7 @@ namespace ARMeilleure.Translation continue; } - Operand source = GetRegFromBit(bit, baseType); + Operand source = GetRegFromBit(bit, baseType, mode); long offset = NativeContext.GetRegisterOffset(source.GetRegister()); @@ -356,28 +358,33 @@ namespace ARMeilleure.Translation } } - private static Operand GetRegFromBit(int bit, RegisterType baseType) + private static Operand GetRegFromBit(int bit, RegisterType baseType, ExecutionMode mode) { if (bit < RegsCount) { - return new Operand(bit, baseType, GetOperandType(baseType)); + return new Operand(bit, baseType, GetOperandType(baseType, mode)); } else if (baseType == RegisterType.Integer) { return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32); } + else if (baseType == RegisterType.Vector) + { + return new Operand(bit & RegsMask, RegisterType.FpFlag, OperandType.I32); + } else { throw new ArgumentOutOfRangeException(nameof(bit)); } } - private static OperandType GetOperandType(RegisterType type) + private static OperandType GetOperandType(RegisterType type, ExecutionMode mode) { switch (type) { case RegisterType.Flag: return OperandType.I32; - case RegisterType.Integer: return OperandType.I64; + case RegisterType.FpFlag: return OperandType.I32; + case RegisterType.Integer: return (mode == ExecutionMode.Aarch64) ? OperandType.I64 : OperandType.I32; case RegisterType.Vector: return OperandType.V128; } @@ -405,7 +412,7 @@ namespace ARMeilleure.Translation private static long ClearCallerSavedVecRegs(long mask) { // TODO: ARM32 support. - mask &= ~CallerSavedVecRegistersMask; + mask &= ~(CallerSavedVecRegistersMask | FpStateNzcvFlagsMask); return mask; } diff --git a/ARMeilleure/Translation/RejitRequest.cs b/ARMeilleure/Translation/RejitRequest.cs new file mode 100644 index 0000000000..e0b0e0b92a --- /dev/null +++ b/ARMeilleure/Translation/RejitRequest.cs @@ -0,0 +1,16 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Translation +{ + struct RejitRequest + { + public ulong Address; + public ExecutionMode Mode; + + public RejitRequest(ulong address, ExecutionMode mode) + { + Address = address; + Mode = mode; + } + } +} diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs index 292e74e36b..46435f4446 100644 --- a/ARMeilleure/Translation/SsaConstruction.cs +++ b/ARMeilleure/Translation/SsaConstruction.cs @@ -268,10 +268,14 @@ namespace ARMeilleure.Translation { return RegisterConsts.IntRegsCount + reg.Index; } - else /* if (reg.Type == RegisterType.Flag) */ + else if (reg.Type == RegisterType.Flag) { return RegisterConsts.IntAndVecRegsCount + reg.Index; } + else /* if (reg.Type == RegisterType.FpFlag) */ + { + return RegisterConsts.FpFlagsOffset + reg.Index; + } } private static Register GetRegisterFromId(int id) @@ -284,10 +288,14 @@ namespace ARMeilleure.Translation { return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector); } - else /* if (id < RegisterConsts.TotalCount) */ + else if (id < RegisterConsts.FpFlagsOffset) { return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag); } + else /* if (id < RegisterConsts.TotalCount) */ + { + return new Register(id - RegisterConsts.FpFlagsOffset, RegisterType.FpFlag); + } } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 4725ca59d8..3008303e76 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation private ConcurrentDictionary _funcs; - private PriorityQueue _backgroundQueue; + private PriorityQueue _backgroundQueue; private AutoResetEvent _backgroundTranslatorEvent; @@ -32,7 +32,7 @@ namespace ARMeilleure.Translation _funcs = new ConcurrentDictionary(); - _backgroundQueue = new PriorityQueue(2); + _backgroundQueue = new PriorityQueue(2); _backgroundTranslatorEvent = new AutoResetEvent(false); } @@ -41,11 +41,11 @@ namespace ARMeilleure.Translation { while (_threadCount != 0) { - if (_backgroundQueue.TryDequeue(out ulong address)) + if (_backgroundQueue.TryDequeue(out RejitRequest request)) { - TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true); + TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true); - _funcs.AddOrUpdate(address, func, (key, oldFunc) => func); + _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func); } else { @@ -114,7 +114,7 @@ namespace ARMeilleure.Translation } else if (isCallTarget && func.ShouldRejit()) { - _backgroundQueue.Enqueue(0, address); + _backgroundQueue.Enqueue(0, new RejitRequest(address, mode)); _backgroundTranslatorEvent.Set(); } @@ -149,7 +149,7 @@ namespace ARMeilleure.Translation Logger.StartPass(PassName.RegisterUsage); - RegisterUsage.RunPass(cfg, isCompleteFunction: false); + RegisterUsage.RunPass(cfg, mode, isCompleteFunction: false); Logger.EndPass(PassName.RegisterUsage); diff --git a/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs b/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs new file mode 100644 index 0000000000..f34b42a917 --- /dev/null +++ b/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs @@ -0,0 +1,139 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Ryujinx.Tests.Unicorn.Native +{ + public enum Arm32Register + { + INVALID = 0, + + APSR, + APSR_NZCV, + CPSR, + FPEXC, + FPINST, + FPSCR, + FPSCR_NZCV, + FPSID, + ITSTATE, + LR, + PC, + SP, + SPSR, + D0, + D1, + D2, + D3, + D4, + D5, + D6, + D7, + D8, + D9, + D10, + D11, + D12, + D13, + D14, + D15, + D16, + D17, + D18, + D19, + D20, + D21, + D22, + D23, + D24, + D25, + D26, + D27, + D28, + D29, + D30, + D31, + FPINST2, + MVFR0, + MVFR1, + MVFR2, + Q0, + Q1, + Q2, + Q3, + Q4, + Q5, + Q6, + Q7, + Q8, + Q9, + Q10, + Q11, + Q12, + Q13, + Q14, + Q15, + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + S0, + S1, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + S12, + S13, + S14, + S15, + S16, + S17, + S18, + S19, + S20, + S21, + S22, + S23, + S24, + S25, + S26, + S27, + S28, + S29, + S30, + S31, + C1_C0_2, + C13_C0_2, + C13_C0_3, + IPSR, + MSP, + PSP, + CONTROL, + ENDING, + + // Alias registers. + R13 = SP, + R14 = LR, + R15 = PC, + SB = R9, + SL = R10, + FP = R11, + IP = R12, + } +} diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch32.cs b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs new file mode 100644 index 0000000000..d7ae90d6e9 --- /dev/null +++ b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs @@ -0,0 +1,280 @@ +using Ryujinx.Tests.Unicorn.Native; +using System; + +namespace Ryujinx.Tests.Unicorn +{ + public class UnicornAArch32 + { + internal readonly IntPtr uc; + + public IndexedProperty R + { + get + { + return new IndexedProperty( + (int i) => GetX(i), + (int i, uint value) => SetX(i, value)); + } + } + + public IndexedProperty Q + { + get + { + return new IndexedProperty( + (int i) => GetQ(i), + (int i, SimdValue value) => SetQ(i, value)); + } + } + + public uint LR + { + get => GetRegister(Arm32Register.LR); + set => SetRegister(Arm32Register.LR, value); + } + + public uint SP + { + get => GetRegister(Arm32Register.SP); + set => SetRegister(Arm32Register.SP, value); + } + + public uint PC + { + get => GetRegister(Arm32Register.PC); + set => SetRegister(Arm32Register.PC, value); + } + + public uint APSR + { + get => (uint)GetRegister(Arm32Register.APSR); + set => SetRegister(Arm32Register.APSR, (uint)value); + } + + public int Fpscr + { + get => (int)GetRegister(Arm32Register.FPSCR) | ((int)GetRegister(Arm32Register.FPSCR_NZCV)); + set => SetRegister(Arm32Register.FPSCR, (uint)value); + } + + public bool OverflowFlag + { + get => (APSR & 0x10000000u) != 0; + set => APSR = (APSR & ~0x10000000u) | (value ? 0x10000000u : 0u); + } + + public bool CarryFlag + { + get => (APSR & 0x20000000u) != 0; + set => APSR = (APSR & ~0x20000000u) | (value ? 0x20000000u : 0u); + } + + public bool ZeroFlag + { + get => (APSR & 0x40000000u) != 0; + set => APSR = (APSR & ~0x40000000u) | (value ? 0x40000000u : 0u); + } + + public bool NegativeFlag + { + get => (APSR & 0x80000000u) != 0; + set => APSR = (APSR & ~0x80000000u) | (value ? 0x80000000u : 0u); + } + + public UnicornAArch32() + { + Interface.Checked(Interface.uc_open(UnicornArch.UC_ARCH_ARM, UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc)); + + SetRegister(Arm32Register.C1_C0_2, GetRegister(Arm32Register.C1_C0_2) | 0xf00000); + SetRegister(Arm32Register.FPEXC, 0x40000000); + } + + ~UnicornAArch32() + { + Interface.Checked(Native.Interface.uc_close(uc)); + } + + public void RunForCount(ulong count) + { + Interface.Checked(Native.Interface.uc_emu_start(uc, this.PC, 0xFFFFFFFFFFFFFFFFu, 0, count)); + } + + public void Step() + { + RunForCount(1); + } + + private static Arm32Register[] XRegisters = new Arm32Register[16] + { + Arm32Register.R0, + Arm32Register.R1, + Arm32Register.R2, + Arm32Register.R3, + Arm32Register.R4, + Arm32Register.R5, + Arm32Register.R6, + Arm32Register.R7, + Arm32Register.R8, + Arm32Register.R9, + Arm32Register.R10, + Arm32Register.R11, + Arm32Register.R12, + Arm32Register.R13, + Arm32Register.R14, + Arm32Register.R15, + }; + + private static Arm32Register[] QRegisters = new Arm32Register[16] + { + Arm32Register.Q0, + Arm32Register.Q1, + Arm32Register.Q2, + Arm32Register.Q3, + Arm32Register.Q4, + Arm32Register.Q5, + Arm32Register.Q6, + Arm32Register.Q7, + Arm32Register.Q8, + Arm32Register.Q9, + Arm32Register.Q10, + Arm32Register.Q11, + Arm32Register.Q12, + Arm32Register.Q13, + Arm32Register.Q14, + Arm32Register.Q15 + }; + + public uint GetX(int index) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return GetRegister(XRegisters[index]); + } + + public void SetX(int index, uint value) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + SetRegister(XRegisters[index], value); + } + + public SimdValue GetQ(int index) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + // Getting quadword registers from Unicorn A32 seems to be broken, so we combine its 2 doubleword registers instead. + return GetVector((Arm32Register)((int)Arm32Register.D0 + index * 2)); + } + + public void SetQ(int index, SimdValue value) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + SetVector((Arm32Register)((int)Arm32Register.D0 + index * 2), value); + } + + public uint GetRegister(Arm32Register register) + { + byte[] data = new byte[4]; + + Interface.Checked(Native.Interface.uc_reg_read(uc, (int)register, data)); + + return (uint)BitConverter.ToInt32(data, 0); + } + + public void SetRegister(Arm32Register register, uint value) + { + byte[] data = BitConverter.GetBytes(value); + + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + } + + public SimdValue GetVector(Arm32Register register) + { + byte[] data = new byte[8]; + + Interface.Checked(Interface.uc_reg_read(uc, (int)register, data)); + ulong lo = BitConverter.ToUInt64(data, 0); + Interface.Checked(Interface.uc_reg_read(uc, (int)register + 1, data)); + ulong hi = BitConverter.ToUInt64(data, 0); + + return new SimdValue(lo, hi); + } + + private void SetVector(Arm32Register register, SimdValue value) + { + byte[] data = BitConverter.GetBytes(value.GetUInt64(0)); + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + data = BitConverter.GetBytes(value.GetUInt64(1)); + Interface.Checked(Interface.uc_reg_write(uc, (int)register + 1, data)); + } + + public byte[] MemoryRead(ulong address, ulong size) + { + byte[] value = new byte[size]; + + Interface.Checked(Interface.uc_mem_read(uc, address, value, size)); + + return value; + } + + public byte MemoryRead8(ulong address) => MemoryRead(address, 1)[0]; + public UInt16 MemoryRead16(ulong address) => (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0); + public UInt32 MemoryRead32(ulong address) => (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0); + public UInt64 MemoryRead64(ulong address) => (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0); + + public void MemoryWrite(ulong address, byte[] value) + { + Interface.Checked(Interface.uc_mem_write(uc, address, value, (ulong)value.Length)); + } + + public void MemoryWrite8(ulong address, byte value) => MemoryWrite(address, new byte[] { value }); + public void MemoryWrite16(ulong address, Int16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite16(ulong address, UInt16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, Int32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, UInt32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, Int64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, UInt64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + + public void MemoryMap(ulong address, ulong size, MemoryPermission permissions) + { + Interface.Checked(Interface.uc_mem_map(uc, address, size, (uint)permissions)); + } + + public void MemoryUnmap(ulong address, ulong size) + { + Interface.Checked(Interface.uc_mem_unmap(uc, address, size)); + } + + public void MemoryProtect(ulong address, ulong size, MemoryPermission permissions) + { + Interface.Checked(Interface.uc_mem_protect(uc, address, size, (uint)permissions)); + } + + public static bool IsAvailable() + { + try + { + Interface.uc_version(out _, out _); + + return true; + } + catch (DllNotFoundException) + { + return false; + } + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTest32.cs b/Ryujinx.Tests/Cpu/CpuTest32.cs new file mode 100644 index 0000000000..ed1d23d37b --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTest32.cs @@ -0,0 +1,530 @@ +using ARMeilleure.Memory; +using ARMeilleure.State; +using ARMeilleure.Translation; +using NUnit.Framework; +using Ryujinx.Tests.Unicorn; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Tests.Cpu +{ + [TestFixture] + public class CpuTest32 + { + private uint _currAddress; + private long _size; + + private uint _entryPoint; + + private IntPtr _ramPointer; + + private MemoryManager _memory; + + private ExecutionContext _context; + + private Translator _translator; + + private static bool _unicornAvailable; + private UnicornAArch32 _unicornEmu; + + private bool usingMemory; + + static CpuTest32() + { + _unicornAvailable = UnicornAArch32.IsAvailable(); + + if (!_unicornAvailable) + { + Console.WriteLine("WARNING: Could not find Unicorn."); + } + } + + [SetUp] + public void Setup() + { + _currAddress = 0x1000; + _size = 0x1000; + + _entryPoint = _currAddress; + + _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size * 2)); + _memory = new MemoryManager(_ramPointer, addressSpaceBits: 16, useFlatPageTable: true); + _memory.Map((long)_currAddress, 0, _size*2); + + _context = new ExecutionContext(); + _context.IsAarch32 = true; + + _translator = new Translator(_memory); + + if (_unicornAvailable) + { + _unicornEmu = new UnicornAArch32(); + _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC); + _unicornEmu.MemoryMap((ulong)(_currAddress + _size), (ulong)_size, MemoryPermission.READ | MemoryPermission.WRITE); + _unicornEmu.PC = _entryPoint; + } + } + + [TearDown] + public void Teardown() + { + Marshal.FreeHGlobal(_ramPointer); + _memory = null; + _context = null; + _translator = null; + _unicornEmu = null; + } + + protected void Reset() + { + Teardown(); + Setup(); + } + + protected void Opcode(uint opcode) + { + _memory.WriteUInt32((long)_currAddress, opcode); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite32((ulong)_currAddress, opcode); + } + + _currAddress += 4; + } + + protected ExecutionContext GetContext() => _context; + protected void SetContext(uint r0 = 0, + uint r1 = 0, + uint r2 = 0, + uint r3 = 0, + uint sp = 0, + V128 v0 = default, + V128 v1 = default, + V128 v2 = default, + V128 v3 = default, + V128 v4 = default, + V128 v5 = default, + V128 v14 = default, + V128 v15 = default, + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpscr = 0) + { + _context.SetX(0, r0); + _context.SetX(1, r1); + _context.SetX(2, r2); + _context.SetX(3, r3); + + _context.SetX(0xd, sp); + + _context.SetV(0, v0); + _context.SetV(1, v1); + _context.SetV(2, v2); + _context.SetV(3, v3); + _context.SetV(4, v4); + _context.SetV(5, v5); + _context.SetV(14, v14); + _context.SetV(15, v15); + + _context.SetPstateFlag(PState.VFlag, overflow); + _context.SetPstateFlag(PState.CFlag, carry); + _context.SetPstateFlag(PState.ZFlag, zero); + _context.SetPstateFlag(PState.NFlag, negative); + + _context.Fpsr = FPSR.A32Mask & (FPSR)fpscr; + _context.Fpcr = FPCR.A32Mask & (FPCR)fpscr; + + if (_unicornAvailable) + { + _unicornEmu.R[0] = r0; + _unicornEmu.R[1] = r1; + _unicornEmu.R[2] = r2; + _unicornEmu.R[3] = r3; + + _unicornEmu.SP = sp; + + _unicornEmu.Q[0] = V128ToSimdValue(v0); + _unicornEmu.Q[1] = V128ToSimdValue(v1); + _unicornEmu.Q[2] = V128ToSimdValue(v2); + _unicornEmu.Q[3] = V128ToSimdValue(v3); + _unicornEmu.Q[4] = V128ToSimdValue(v4); + _unicornEmu.Q[5] = V128ToSimdValue(v5); + _unicornEmu.Q[14] = V128ToSimdValue(v14); + _unicornEmu.Q[15] = V128ToSimdValue(v15); + + _unicornEmu.OverflowFlag = overflow; + _unicornEmu.CarryFlag = carry; + _unicornEmu.ZeroFlag = zero; + _unicornEmu.NegativeFlag = negative; + + _unicornEmu.Fpscr = fpscr; + } + } + + protected void ExecuteOpcodes() + { + _translator.Execute(_context, _entryPoint); + + if (_unicornAvailable) + { + _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4); + } + } + + protected ExecutionContext SingleOpcode(uint opcode, + uint r0 = 0, + uint r1 = 0, + uint r2 = 0, + uint r3 = 0, + uint sp = 0, + V128 v0 = default, + V128 v1 = default, + V128 v2 = default, + V128 v3 = default, + V128 v4 = default, + V128 v5 = default, + V128 v14 = default, + V128 v15 = default, + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpscr = 0, + bool copyFpFlags = false) + { + Opcode(opcode); + if (copyFpFlags) + { + Opcode(0xeef1fa10); + } + Opcode(0xe12fff1e); // BX LR + SetContext(r0, r1, r2, r3, sp, v0, v1, v2, v3, v4, v5, v14, v15, overflow, carry, zero, negative, fpscr); + ExecuteOpcodes(); + + return GetContext(); + } + + protected void SetWorkingMemory(byte[] data) + { + _memory.WriteBytes(0x2000, data); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite((ulong)(0x2000), data); + } + + usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too. + } + + /// Rounding Mode control field. + public enum RMode + { + /// Round to Nearest mode. + Rn, + /// Round towards Plus Infinity mode. + Rp, + /// Round towards Minus Infinity mode. + Rm, + /// Round towards Zero mode. + Rz + }; + + /// Floating-point Control Register. + protected enum Fpcr + { + /// Rounding Mode control field. + RMode = 22, + /// Flush-to-zero mode control bit. + Fz = 24, + /// Default NaN mode control bit. + Dn = 25, + /// Alternative half-precision control bit. + Ahp = 26 + } + + /// Floating-point Status Register. + [Flags] + protected enum Fpsr + { + None = 0, + + /// Invalid Operation cumulative floating-point exception bit. + Ioc = 1 << 0, + /// Divide by Zero cumulative floating-point exception bit. + Dzc = 1 << 1, + /// Overflow cumulative floating-point exception bit. + Ofc = 1 << 2, + /// Underflow cumulative floating-point exception bit. + Ufc = 1 << 3, + /// Inexact cumulative floating-point exception bit. + Ixc = 1 << 4, + /// Input Denormal cumulative floating-point exception bit. + Idc = 1 << 7, + + /// Cumulative saturation bit. + Qc = 1 << 27, + + /// NZCV flags + Nzcv = (1 << 28) | (1 << 29) | (1 << 30) | (1 << 31) + } + + [Flags] + protected enum FpSkips + { + None = 0, + + IfNaNS = 1, + IfNaND = 2, + + IfUnderflow = 4, + IfOverflow = 8 + } + + protected enum FpTolerances + { + None, + + UpToOneUlpsS, + UpToOneUlpsD + } + + protected void CompareAgainstUnicorn( + Fpsr fpsrMask = Fpsr.None, + FpSkips fpSkips = FpSkips.None, + FpTolerances fpTolerances = FpTolerances.None) + { + if (!_unicornAvailable) + { + return; + } + + if (fpSkips != FpSkips.None) + { + ManageFpSkips(fpSkips); + } + + Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.R[0])); + Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.R[1])); + Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.R[2])); + Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.R[3])); + Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.R[4])); + Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.R[5])); + Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.R[6])); + Assert.That(_context.GetX(7), Is.EqualTo(_unicornEmu.R[7])); + Assert.That(_context.GetX(8), Is.EqualTo(_unicornEmu.R[8])); + Assert.That(_context.GetX(9), Is.EqualTo(_unicornEmu.R[9])); + Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.R[10])); + Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.R[11])); + Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.R[12])); + Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.R[13])); + Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.R[14])); + + if (fpTolerances == FpTolerances.None) + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + else + { + ManageFpTolerances(fpTolerances); + } + Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1])); + Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2])); + Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3])); + Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4])); + Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5])); + Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6])); + Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7])); + Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8])); + Assert.That(V128ToSimdValue(_context.GetV(9)), Is.EqualTo(_unicornEmu.Q[9])); + Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10])); + Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11])); + Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12])); + Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13])); + Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14])); + Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15])); + + Assert.That((int)_context.Fpcr | ((int)_context.Fpsr & (int)fpsrMask), Is.EqualTo(_unicornEmu.Fpscr)); + + Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag)); + Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); + Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag)); + Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag)); + + if (usingMemory) + { + byte[] meilleureMem = _memory.ReadBytes((long)(0x2000), _size); + byte[] unicornMem = _unicornEmu.MemoryRead((ulong)(0x2000), (ulong)_size); + + for (int i = 0; i < _size; i++) + { + Assert.AreEqual(meilleureMem[i], unicornMem[i]); + } + } + } + + private void ManageFpSkips(FpSkips fpSkips) + { + if (fpSkips.HasFlag(FpSkips.IfNaNS)) + { + if (float.IsNaN(_unicornEmu.Q[0].AsFloat())) + { + Assert.Ignore("NaN test."); + } + } + else if (fpSkips.HasFlag(FpSkips.IfNaND)) + { + if (double.IsNaN(_unicornEmu.Q[0].AsDouble())) + { + Assert.Ignore("NaN test."); + } + } + + if (fpSkips.HasFlag(FpSkips.IfUnderflow)) + { + if ((_unicornEmu.Fpscr & (int)Fpsr.Ufc) != 0) + { + Assert.Ignore("Underflow test."); + } + } + + if (fpSkips.HasFlag(FpSkips.IfOverflow)) + { + if ((_unicornEmu.Fpscr & (int)Fpsr.Ofc) != 0) + { + Assert.Ignore("Overflow test."); + } + } + } + + private void ManageFpTolerances(FpTolerances fpTolerances) + { + bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f); + bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d); + + if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess) + { + if (fpTolerances == FpTolerances.UpToOneUlpsS) + { + if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) && + IsNormalOrSubnormalS(_context.GetV(0).AsFloat())) + { + Assert.That(_context.GetV(0).GetFloat(0), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(1), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(2), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(3), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps); + + Console.WriteLine(fpTolerances); + } + else + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + } + + if (fpTolerances == FpTolerances.UpToOneUlpsD) + { + if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) && + IsNormalOrSubnormalD(_context.GetV(0).AsDouble())) + { + Assert.That(_context.GetV(0).GetDouble(0), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetDouble(1), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps); + + Console.WriteLine(fpTolerances); + } + else + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + } + } + } + + private static SimdValue V128ToSimdValue(V128 value) + { + return new SimdValue(value.GetUInt64(0), value.GetUInt64(1)); + } + + protected static V128 MakeVectorScalar(float value) => new V128(value); + protected static V128 MakeVectorScalar(double value) => new V128(value); + + protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0); + protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1); + + protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1); + + protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0); + protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1); + + protected static ushort GenNormalH() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUShort(); + while ((rnd & 0x7C00u) == 0u || + (~rnd & 0x7C00u) == 0u); + + return (ushort)rnd; + } + + protected static ushort GenSubnormalH() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUShort(); + while ((rnd & 0x03FFu) == 0u); + + return (ushort)(rnd & 0x83FFu); + } + + protected static uint GenNormalS() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((rnd & 0x7F800000u) == 0u || + (~rnd & 0x7F800000u) == 0u); + + return rnd; + } + + protected static uint GenSubnormalS() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((rnd & 0x007FFFFFu) == 0u); + + return rnd & 0x807FFFFFu; + } + + protected static ulong GenNormalD() + { + ulong rnd; + + do rnd = TestContext.CurrentContext.Random.NextULong(); + while ((rnd & 0x7FF0000000000000ul) == 0ul || + (~rnd & 0x7FF0000000000000ul) == 0ul); + + return rnd; + } + + protected static ulong GenSubnormalD() + { + ulong rnd; + + do rnd = TestContext.CurrentContext.Random.NextULong(); + while ((rnd & 0x000FFFFFFFFFFFFFul) == 0ul); + + return rnd & 0x800FFFFFFFFFFFFFul; + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs new file mode 100644 index 0000000000..145417ae2c --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs @@ -0,0 +1,61 @@ +#define Alu32 + +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("Alu32")] + public sealed class CpuTestAlu32 : CpuTest32 + { +#if Alu32 + +#region "ValueSource (Opcodes)" + private static uint[] _Lsr_Lsl_Asr_Ror_() + { + return new uint[] + { + 0xe1b00030u, // LSRS R0, R0, R0 + 0xe1b00010u, // LSLS R0, R0, R0 + 0xe1b00050u, // ASRS R0, R0, R0 + 0xe1b00070u // RORS R0, R0, R0 + }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("RBIT , ")] + public void Rbit_32bit([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn) + { + uint opcode = 0xe6ff0f30u; // RBIT R0, R0 + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, sp: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Lsr_Lsl_Asr_Ror([ValueSource("_Lsr_Lsl_Asr_Ror_")] uint opcode, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint shiftValue, + [Range(0, 31)] [Values(32, 256, 768, -1, -23)] int shiftAmount) + { + uint rd = 0; + uint rm = 1; + uint rs = 2; + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rs & 15) << 8); + + SingleOpcode(opcode, r1: shiftValue, r2: (uint)shiftAmount); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs new file mode 100644 index 0000000000..25b2c96873 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs @@ -0,0 +1,84 @@ +#define AluRs32 + +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("AluRs32")] + public sealed class CpuTestAluRs32 : CpuTest32 + { +#if AluRs32 + +#region "ValueSource (Opcodes)" + private static uint[] _Add_Adds_Rsb_Rsbs_() + { + return new uint[] + { + 0xe0800000u, // ADD R0, R0, R0, LSL #0 + 0xe0900000u, // ADDS R0, R0, R0, LSL #0 + 0xe0600000u, // RSB R0, R0, R0, LSL #0 + 0xe0700000u // RSBS R0, R0, R0, LSL #0 + }; + } + + private static uint[] _Adc_Adcs_Rsc_Rscs_Sbc_Sbcs_() + { + return new uint[] + { + 0xe0a00000u, // ADC R0, R0, R0 + 0xe0b00000u, // ADCS R0, R0, R0 + 0xe0e00000u, // RSC R0, R0, R0 + 0xe0f00000u, // RSCS R0, R0, R0 + 0xe0c00000u, // SBC R0, R0, R0 + 0xe0d00000u // SBCS R0, R0, R0 + }; + } +#endregion + + private const int RndCnt = 2; + private const int RndCntAmount = 2; + + [Test, Pairwise] + public void Adc_Adcs_Rsc_Rscs_Sbc_Sbcs([ValueSource("_Adc_Adcs_Rsc_Rscs_Sbc_Sbcs_")] uint opcode, + [Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Add_Adds_Rsb_Rsbs([ValueSource("_Add_Adds_Rsb_Rsbs_")] uint opcode, + [Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestBf32.cs b/Ryujinx.Tests/Cpu/CpuTestBf32.cs new file mode 100644 index 0000000000..66b8fc0623 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestBf32.cs @@ -0,0 +1,108 @@ +#define Bf32 + +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("Bf32")] + public sealed class CpuTestBf32 : CpuTest32 + { +#if Bf32 + private const int RndCnt = 2; + private const int RndCntImmr = 2; + private const int RndCntImms = 2; + + [Test, Pairwise, Description("BFC , #, #")] + public void Bfc([Values(0u, 0xdu)] uint rd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wd, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // Don't test unpredictable for now. + uint opcode = 0xe7c0001fu; // BFC R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("BFI , , #, #")] + public void Bfi([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // Don't test unpredictable for now. + uint opcode = 0xe7c00010u; // BFI R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UBFX , , #, #")] + public void Ubfx([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint widthm1) + { + if (lsb + widthm1 > 31) + { + widthm1 -= (lsb + widthm1) - 31; + } + uint opcode = 0xe7e00050u; // UBFX R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((widthm1 & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SBFX , , #, #")] + public void Sbfx([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint widthm1) + { + if (lsb + widthm1 > 31) + { + widthm1 -= (lsb + widthm1) - 31; + } + uint opcode = 0xe7a00050u; // SBFX R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((widthm1 & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs new file mode 100644 index 0000000000..dfbd3b0bdd --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs @@ -0,0 +1,61 @@ +#define SimdLogical32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdLogical32")] + public sealed class CpuTestSimdLogical32 : CpuTest32 + { +#if SimdLogical32 + +#region "ValueSource (Opcodes)" + private static uint[] _Vbif_Vbit_Vbsl_Vand_() + { + return new uint[] + { + 0xf3300110u, // VBIF D0, D0, D0 + 0xf3200110u, // VBIT D0, D0, D0 + 0xf3100110u, // VBSL D0, D0, D0 + 0xf2000110u // VAND D0, D0, D0 + }; + } + #endregion + + private const int RndCnt = 2; + + [Test, Pairwise] + public void Vbif_Vbit_Vbsl_Vand([ValueSource("_Vbif_Vbit_Vbsl_Vand_")] uint opcode, + [Range(0u, 4u)] uint rd, + [Range(0u, 4u)] uint rn, + [Range(0u, 4u)] uint rm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs new file mode 100644 index 0000000000..eb27d95fbc --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs @@ -0,0 +1,319 @@ +#define SimdMemory32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdMemory32")] + public sealed class CpuTestSimdMemory32 : CpuTest32 + { +#if SimdMemory32 + private const int RndCntImm = 2; + + private uint[] LDSTModes = + { + // LD1 + 0b0111, + 0b1010, + 0b0110, + 0b0010, + + // LD2 + 0b1000, + 0b1001, + 0b0011, + + // LD3 + 0b0100, + 0b0101, + + // LD4 + 0b0000, + 0b0001 + }; + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vldn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4a00000u; // VLD1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc. + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (all lanes)")] + public void Vldn_All([Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint n, + [Range(0u, 2u)] uint size, + [Values] bool t, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4a00c00u; // VLD1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc. + if (t) opcode |= 1 << 5; + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vldn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vstn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4800000u; // VST1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // ST1 is 0, ST2 is 1 etc. + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vstn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDM. {!}, ")] + public void Vldm([Values(0u, 13u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 2u)] uint mode, + [Values(0x1u, 0x32u)] [Random(2u, 31u, RndCntImm)] uint regs, + [Values] bool single) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 + + uint[] vldmModes = { + // Note: 3rd 0 leaves a space for "D". + 0b0100, // Increment after. + 0b0101, // Increment after. (!) + 0b1001 // Decrement before. (!) + }; + + opcode |= ((vldmModes[mode] & 15) << 21); + opcode |= ((rn & 15) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= ((uint)(single ? 0 : 1) << 8); + + if (!single) regs = (regs << 1); // Low bit must be 0 - must be even number of registers. + uint regSize = single ? 1u : 2u; + + if (vd + (regs / regSize) > 32) // Can't address further than S31 or D31. + { + regs -= (vd + (regs / regSize)) - 32; + } + + if (regs / regSize > 16) // Can't do more than 16 registers at a time. + { + regs = 16 * regSize; + } + + opcode |= regs & 0xff; + + SingleOpcode(opcode, r0: 0x2500, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDR. , [ {, #{+/-}}]")] + public void Vldr([Values(2u, 3u)] uint size, // FP16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed900a00u; // VLDR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= imm & 0xff; + + SingleOpcode(opcode, r0: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTR. , [ {, #{+/-}}]")] + public void Vstr([Values(2u, 3u)] uint size, // FP16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed800a00u; // VSTR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= imm & 0xff; + + (V128 vec1, V128 vec2, _, _) = GenerateTestVectors(); + + SingleOpcode(opcode, r0: 0x2500, v0: vec1, v1: vec2); + + CompareAgainstUnicorn(); + } + + private (V128, V128, V128, V128) GenerateTestVectors() + { + return ( + new V128(-12.43f, 1872.23f, 4456.23f, -5622.2f), + new V128(0.0f, float.NaN, float.PositiveInfinity, float.NegativeInfinity), + new V128(1.23e10f, -0.0f, -0.123f, 0.123f), + new V128(float.Epsilon, 3.5f, 925.23f, -104.9f) + ); + } + + private byte[] GenerateVectorSequence(int length) + { + int floatLength = length >> 2; + float[] data = new float[floatLength]; + + for (int i = 0; i < floatLength; i++) + { + data[i] = i + (i / 9f); + } + + var result = new byte[length]; + Buffer.BlockCopy(data, 0, result, 0, result.Length); + return result; + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs new file mode 100644 index 0000000000..13d6107884 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -0,0 +1,494 @@ +#define SimdMov32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdMov32")] + public sealed class CpuTestSimdMov32 : CpuTest32 + { +#if SimdMov32 + private const int RndCntImm = 2; + + [Test, Pairwise, Description("VMOV.I
, #")] + public void Movi_V([Range(0u, 10u)] uint variant, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(1u, 0xffu, RndCntImm)] uint imm, + [Values] bool q) + { + uint[] variants = + { + // I32 + 0b0000_0, + 0b0010_0, + 0b0100_0, + 0b0110_0, + + // I16 + 0b1000_0, + 0b1010_0, + + // DT + 0b1100_0, + 0b1101_0, + 0b1110_0, + 0b1111_0, + + 0b1110_1 + }; + + + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 + uint cmodeOp = variants[variant]; + + if (q) + { + vd <<= 1; + } + + opcode |= ((cmodeOp & 1) << 5) | ((cmodeOp & 0x1e) << 7); + opcode |= ((q ? 1u : 0u) << 6); + opcode |= (imm & 0xf) | ((imm & 0x70) << 12) | ((imm & 0x80) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV.F , #")] + public void Movi_S([Range(2u, 3u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm) + { + uint opcode = 0xeeb00800u; + opcode |= (size & 3) << 8; + opcode |= (imm & 0xf) | ((imm & 0xf0) << 12); + + if (size == 2) + { + opcode |= ((vd & 0x1) << 22); + opcode |= ((vd & 0x1e) << 11); + } + else + { + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + } + + SingleOpcode(opcode); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV , ")] + public void Mov_GP([Values(0u, 1u, 2u, 3u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Random(RndCntImm)] uint valueRn, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xee000a10u; // VMOV S0, R0 + opcode |= (vn & 1) << 7; + opcode |= (vn & 0x1e) << 15; + opcode |= (rt & 0xf) << 12; + + if (op) opcode |= 1 << 20; + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV. , ")] + public void Mov_GP_Elem([Range(0u, 7u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Range(0u, 2u)] uint size, + [Range(0u, 7u)] uint index, + [Random(1)] uint valueRn, + [Random(1)] ulong valueVn1, + [Random(1)] ulong valueVn2, + [Values] bool op, + [Values] bool u) + { + uint opcode = 0xee000b10u; // VMOV.32 D0[0], R0 + + uint opEncode = 0b01000; + switch (size) + { + case 0: + opEncode = (0b1000) | index & 7; + break; + case 1: + opEncode = (0b0001) | ((index & 3) << 1); + break; + case 2: + opEncode = (index & 1) << 2; + break; + } + + opcode |= ((opEncode >> 2) << 21) | ((opEncode & 3) << 5); + + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (rt & 0xf) << 12; + + if (op) + { + opcode |= 1 << 20; + if (u && size != 2) + { + opcode |= 1 << 23; + } + } + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2), v1: new V128(valueVn2, valueVn1)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("(VMOV , , ), (VMOV , , )")] + public void Mov_GP_D([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u, 3u)] uint rt2, + [Random(RndCntImm)] uint valueRt1, + [Random(RndCntImm)] uint valueRt2, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xec400b10u; // VMOV D0, R0, R0 + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (rt & 0xf) << 12; + opcode |= (rt2 & 0xf) << 16; + + if (op) + { + opcode |= 1 << 20; + } + + SingleOpcode(opcode, r0: valueRt1, r1: valueRt2, r2: valueRt1, r3: valueRt2, v0: new V128(valueVn1, valueVn2)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("(VMOV , , , ), (VMOV , , , )")] + public void Mov_GP_2([Range(0u, 7u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u, 3u)] uint rt2, + [Random(RndCntImm)] uint valueRt1, + [Random(RndCntImm)] uint valueRt2, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xec400a10u; // VMOV S0, S1, R0, R0 + opcode |= (vm & 1) << 5; + opcode |= (vm & 0x1e) >> 1; + opcode |= (rt & 0xf) << 12; + opcode |= (rt2 & 0xf) << 16; + + if (op) + { + opcode |= 1 << 20; + } + + SingleOpcode(opcode, r0: valueRt1, r1: valueRt2, r2: valueRt1, r3: valueRt2, v0: new V128(valueVn1, valueVn2), v1: new V128(valueVn2, valueVn1)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOVN.
, ")] + public void Movn_V([Range(0u, 1u, 2u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 2u, 4u, 8u)] uint vm) + { + uint opcode = 0xf3b20200u; // VMOVN.I16 D0, Q0 + + opcode |= (size & 0x3) << 18; + opcode |= ((vm & 0x10) << 1); + opcode |= ((vm & 0xf) << 0); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VTRN. , ")] + public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20080u; // VTRN.8 D0, D0 + if (vm == vd) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VZIP. , ")] + public void Vzip([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20180u; // VZIP.8 D0, D0 + if (vm == vd || (size == 2 && !q)) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VUZP. , ")] + public void Vuzp([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20100u; // VUZP.8 d0, d0 + if (vm == vd || (size == 2 && !q)) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VTBL.8
, {list}, ")] + public void Vtbl([Range(0u, 6u)] uint vm, // Indices, include potentially invalid. + [Range(4u, 12u)] uint vn, // Selection. + [Values(0u, 1u)] uint vd, // Destinations. + [Range(0u, 3u)] uint length, + [Values] bool x) + { + uint opcode = 0xf3b00800u; // VTBL.8 D0, {D0}, D0 + if (vn + length > 31) + { + return; // Undefined. + } + + if (x) + { + opcode |= 1 << 6; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (length & 0x3) << 8; + + var rnd = TestContext.CurrentContext.Random; + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + byte maxIndex = (byte)(length * 8 - 1); + byte[] b0 = new byte[16]; + byte[] b1 = new byte[16]; + for (int i=0; i<16; i++) + { + b0[i] = rnd.NextByte(maxIndex); + b1[i] = rnd.NextByte(maxIndex); + } + + V128 v0 = new V128(b0); + V128 v1 = new V128(b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VEXT.8 {,} , , #")] + public void Vext([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 15u)] uint imm4, + [Values] bool q) + { + uint opcode = 0xf2b00000; // VEXT.32 D0, D0, D0, #0 + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; vn <<= 1; + } + else if (imm4 > 7) + { + return; // Undefined. + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (imm4 & 0xf) << 8; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VDUP. , ")] + public void Vdup_GP([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCntImm)] uint valueRn, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool q) + { + uint opcode = 0xee800b10; // VDUP.32 d0, r0 + + if (q) + { + opcode |= 1 << 21; + vd <<= 1; + } + + opcode |= (vd & 0x10) << 3; + opcode |= (vd & 0xf) << 16; + opcode |= (rt & 0xf) << 12; + + opcode |= (size & 1) << 5; // E + opcode |= (size & 2) << 21; // B + + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2), v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VDUP. , ")] + public void Vdup_S([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u)] uint size, + [Range(0u, 7u)] uint index, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool q) + { + uint opcode = 0xf3b00c00; + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; + } + + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + + uint imm4 = 0; + switch (size) + { + case 0: + imm4 |= 0b0100 | ((index & 1) << 3); + break; + case 1: + imm4 |= 0b0010 | ((index & 3) << 2); + break; + case 2: + imm4 |= 0b0001 | ((index & 7) << 1); + break; + } + + opcode |= imm4 << 16; + + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: new V128(valueVn1, valueVn2), v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs new file mode 100644 index 0000000000..a3ba936918 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -0,0 +1,351 @@ +#define SimdReg32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdReg32")] + public sealed class CpuTestSimdReg32 : CpuTest32 + { +#if SimdReg32 + +#region "ValueSource (Types)" + private static ulong[] _1B1H1S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, + 0x0000000000000080ul, 0x00000000000000FFul, + 0x0000000000007FFFul, 0x0000000000008000ul, + 0x000000000000FFFFul, 0x000000007FFFFFFFul, + 0x0000000080000000ul, 0x00000000FFFFFFFFul, + 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul, + 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1H1S_() + { + return new ulong[] { 0x0000000000000000ul, 0x0000000000007FFFul, + 0x0000000000008000ul, 0x000000000000FFFFul, + 0x000000007FFFFFFFul, 0x0000000080000000ul, + 0x00000000FFFFFFFFul }; + } + + private static ulong[] _4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (rnd1 << 32) | rnd1; + yield return (rnd2 << 32) | rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; + } + } +#endregion + + private const int RndCnt = 2; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Explicit] + [Test, Pairwise, Description("VADD.f32 V0, V0, V0")] + public void Vadd_f32([Values(0u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 2u)] uint rm, + [ValueSource("_2S_F_")] ulong z0, + [ValueSource("_2S_F_")] ulong z1, + [ValueSource("_2S_F_")] ulong a0, + [ValueSource("_2S_F_")] ulong a1, + [ValueSource("_2S_F_")] ulong b0, + [ValueSource("_2S_F_")] ulong b1, + [Values] bool q) + { + uint opcode = 0xf2000d00u; // VADD.F32 D0, D0, D0 + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); + V128 v2 = MakeVectorE0E1(b0, b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VCMP.f Vd, Vm")] + public void Vcmp([Values(2u, 3u)] uint size, + [ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b, + [Values] bool e) + { + uint opcode = 0xeeb40840u; + uint rm = 1; + uint rd = 2; + + if (size == 3) + { + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + } + else + { + opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + } + + opcode |= ((size & 3) << 8); + if (e) + { + opcode |= 1 << 7; + } + + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + int fpscr = (int)(TestContext.CurrentContext.Random.NextUInt(0xf) << 28); + + SingleOpcode(opcode, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n, fpscr: fpscr, copyFpFlags: true); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHL. {}, , ")] + public void Vshl([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q, + [Values] bool u) + { + uint opcode = 0xf2000400u; // VSHL.S8 D0, D0, D0 + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + if (u) + { + opcode |= 1 << 24; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Explicit] + [Test, Pairwise, Description("VPADD.f32 V0, V0, V0")] + public void Vpadd_f32([Values(0u)] uint rd, + [Range(0u, 7u)] uint rn, + [Range(0u, 7u)] uint rm) + { + // not currently a slow path test - just a sanity check for pairwise + uint opcode = 0xf3000d00u; // VPADD.F32 D0, D0, D0 + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + var rnd = TestContext.CurrentContext.Random; + V128 v0 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + V128 v1 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + V128 v2 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs new file mode 100644 index 0000000000..6c7b0493b9 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs @@ -0,0 +1,116 @@ +#define SimdShImm32 + +using ARMeilleure.State; +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdShImm32")] + public sealed class CpuTestSimdShImm32 : CpuTest32 + { +#if SimdShImm32 + private const int RndCnt = 2; + + [Test, Pairwise, Description("VSHL. {}, , #")] + public void Vshl_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHR. {}, , #")] + public void Vshr_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u, + [Values] bool q) + { + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + if (u) + { + opcode |= 1 << 24; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHRN. {}, , #")] + public void Vshrn_Imm([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b) + { + uint opcode = 0xf2800810u; // VMOV.I16 D0, #0 (immediate value changes it into SHRN) + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +}