From b1b6f294f252e998920e8f1e6a8eefa2860f0d2c Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 23 Feb 2020 21:20:40 +0000 Subject: [PATCH] Add most of the A32 instruction set to ARMeilleure (#897) * Implement TEQ and MOV (Imm16) * Initial work on A32 instructions + SVC. No tests yet, hangs in rtld. * Implement CLZ, fix BFI and BFC Now stops on SIMD initialization. * Exclusive access instructions, fix to mul, system instructions. Now gets to a break after SignalProcessWideKey64. * Better impl of UBFX, add UDIV and SDIV Now boots way further - now stuck on VMOV instruction. * Many more instructions, start on SIMD and testing framework. * Fix build issues * svc: Rework 32 bit codepath Fixing once and for all argument ordering issues. * Fix 32 bits stacktrace * hle debug: Add 32 bits dynamic section parsing * Fix highCq mode, add many tests, fix some instruction bugs Still suffers from critical malloc failure :weary: * Fix incorrect opcode decoders and a few more instructions. * Add a few instructions and fix others. re-disable highCq for now. Disabled the svc memory clear since i'm not sure about it. * Fix build * Fix typo in ordered/exclusive stores. * Implement some more instructions, fix others. Uxtab16/Sxtab16 are untested. * Begin impl of pairwise, some other instructions. * Add a few more instructions, a quick hack to fix svcs for now. * Add tests and fix issues with VTRN, VZIP, VUZP * Add a few more instructions, fix Vmul_1 encoding. * Fix way too many instruction bugs, add tests for some of the more important ones. * Fix HighCq, enable FastFP paths for some floating point instructions (not entirely sure why these were disabled, so important to note this commit exists) Branching has been removed in A32 shifts until I figure out if it's worth it * Cleanup Part 1 There should be no functional change between these next few commits. Should is the key word. (except for removing break handler) * Implement 32 bits syscalls Co-authored-by: riperiperi Implement all 32 bits counterparts of the 64 bits syscalls we currently have. * Refactor part 2: Move index/subindex logic to Operand May have inadvertently fixed one (1) bug * Add FlushProcessDataCache32 * Address jd's comments * Remove 16 bit encodings from OpCodeTable Still need to catch some edge cases (operands that use the "F" flag) and make Q encodings with non-even indexes undefined. * Correct Fpscr handling for FP vector slow paths WIP * Add StandardFPSCRValue behaviour for all Arithmetic instructions * Add StandardFPSCRValue behaviour to compare instructions. * Force passing of fpcr to FPProcessException and FPUnpack. Reduces potential for code error significantly * OpCode cleanup * Remove urgency from DMB comment in MRRC DMB is currently a no-op via the instruction, so it should likely still be a no-op here. * Test Cleanup * Fix FPDefaultNaN on Ryzen CPUs * Improve some tests, fix some shift instructions, add slow path for Vadd * Fix Typo * More test cleanup * Flip order of Fx and index, to indicate that the operand's is the "base" * Remove Simd32 register type, use Int32 and Int64 for scalars like A64 does. * Reintroduce alignment to DecoderHelper (removed by accident) * One more realign as reading diffs is hard * Use I32 registers in A32 (part 2) Swap default integer register type based on current execution mode. * FPSCR flags as Registers (part 1) Still need to change NativeContext and ExecutionContext to allow getting/setting with the flag values. * Use I32 registers in A32 (part 1) * FPSCR flags as registers (part 2) Only CMP flags are on the registers right now. It could be useful to use more of the space in non-fast-float when implementing A32 flags accurately in the fast path. * Address Feedback * Correct FP->Int behaviour (should saturate) * Make branches made by writing to PC eligible for Rejit Greatly improves performance in most games. * Remove unused branching for Vtbl * RejitRequest as a class rather than a tuple Makes a lot more sense than storing tuples on a dictionary. * Add VMOVN, VSHR (imm), VSHRN (imm) and related tests * Re-order InstEmitSystem32 Alphabetical sorting. * Address Feedback Feedback from Ac_K, remove and sort usings. * Address Feedback 2 * Address Feedback from LDj3SNuD Opcode table reordered to have alphabetical sorting within groups, Vmaxnm and Vminnm have split names to be less ambiguous, SoftFloat nits, Test nits and Test simplification with ValueSource. * Add Debug Asserts to A32 helpers Mainly to prevent the shift ones from being used on I64 operands, as they expect I32 input for most operations (eg. carry flag setting), and expect I32 input for shift and boolean amounts. Most other helper functions don't take Operands, throw on out of range values, and take specific types of OpCode, so didn't need any asserts. * Use ConstF rather than creating an operand. (useful for pooling in future) * Move exclusive load to helper, reference call flag rather than literal 1. * Address LDj feedback (minus table flatten) one final look before it's all gone. the world is so beautiful. * Flatten OpCodeTable oh no * Address more table ordering * Call Flag as int on A32 Co-authored-by: Natalie C. Co-authored-by: Thog --- ARMeilleure/Decoders/Decoder.cs | 5 +- ARMeilleure/Decoders/DecoderHelper.cs | 15 + ARMeilleure/Decoders/IOpCode32AluBf.cs | 11 + ARMeilleure/Decoders/IOpCode32AluReg.cs | 7 + ARMeilleure/Decoders/IOpCode32AluUx.cs | 8 + ARMeilleure/Decoders/IOpCode32Mem.cs | 1 - ARMeilleure/Decoders/IOpCode32MemEx.cs | 7 + ARMeilleure/Decoders/IOpCode32Simd.cs | 4 + ARMeilleure/Decoders/IOpCode32SimdImm.cs | 9 + ARMeilleure/Decoders/OpCode32AluBf.cs | 24 + ARMeilleure/Decoders/OpCode32AluImm16.cs | 15 + ARMeilleure/Decoders/OpCode32AluMla.cs | 28 + ARMeilleure/Decoders/OpCode32AluReg.cs | 12 + ARMeilleure/Decoders/OpCode32AluRsImm.cs | 8 +- ARMeilleure/Decoders/OpCode32AluRsReg.cs | 18 + ARMeilleure/Decoders/OpCode32AluUmull.cs | 30 + ARMeilleure/Decoders/OpCode32AluUx.cs | 16 + ARMeilleure/Decoders/OpCode32Exception.cs | 12 + ARMeilleure/Decoders/OpCode32Mem.cs | 2 +- ARMeilleure/Decoders/OpCode32MemLdEx.cs | 12 + ARMeilleure/Decoders/OpCode32MemReg.cs | 12 + ARMeilleure/Decoders/OpCode32MemRsImm.cs | 16 + ARMeilleure/Decoders/OpCode32MemStEx.cs | 13 + ARMeilleure/Decoders/OpCode32Simd.cs | 30 + ARMeilleure/Decoders/OpCode32SimdBase.cs | 52 + ARMeilleure/Decoders/OpCode32SimdBinary.cs | 18 + ARMeilleure/Decoders/OpCode32SimdCmpZ.cs | 15 + ARMeilleure/Decoders/OpCode32SimdCvtFI.cs | 13 + ARMeilleure/Decoders/OpCode32SimdDupElem.cs | 40 + ARMeilleure/Decoders/OpCode32SimdDupGP.cs | 31 + ARMeilleure/Decoders/OpCode32SimdExt.cs | 17 + ARMeilleure/Decoders/OpCode32SimdImm.cs | 37 + ARMeilleure/Decoders/OpCode32SimdImm44.cs | 36 + ARMeilleure/Decoders/OpCode32SimdMemImm.cs | 35 + ARMeilleure/Decoders/OpCode32SimdMemMult.cs | 71 ++ ARMeilleure/Decoders/OpCode32SimdMemPair.cs | 46 + ARMeilleure/Decoders/OpCode32SimdMemSingle.cs | 46 + ARMeilleure/Decoders/OpCode32SimdMovGp.cs | 26 + .../Decoders/OpCode32SimdMovGpDouble.cs | 31 + ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs | 46 + ARMeilleure/Decoders/OpCode32SimdReg.cs | 22 + ARMeilleure/Decoders/OpCode32SimdRegElem.cs | 21 + ARMeilleure/Decoders/OpCode32SimdRegS.cs | 20 + ARMeilleure/Decoders/OpCode32SimdRev.cs | 14 + ARMeilleure/Decoders/OpCode32SimdS.cs | 31 + ARMeilleure/Decoders/OpCode32SimdSel.cs | 20 + ARMeilleure/Decoders/OpCode32SimdShImm.cs | 44 + ARMeilleure/Decoders/OpCode32SimdSpecial.cs | 14 + ARMeilleure/Decoders/OpCode32SimdSqrte.cs | 16 + ARMeilleure/Decoders/OpCode32SimdTbl.cs | 21 + ARMeilleure/Decoders/OpCode32System.cs | 26 + ARMeilleure/Decoders/OpCodeSimdHelper.cs | 88 ++ ARMeilleure/Decoders/OpCodeTable.cs | 303 +++++- ARMeilleure/Diagnostics/IRDumper.cs | 1 + ARMeilleure/Instructions/DelegateTypes.cs | 8 + ARMeilleure/Instructions/InstEmitAlu.cs | 34 - ARMeilleure/Instructions/InstEmitAlu32.cs | 555 ++++++++++- ARMeilleure/Instructions/InstEmitAluHelper.cs | 260 ++++- .../Instructions/InstEmitException32.cs | 36 + ARMeilleure/Instructions/InstEmitFlow32.cs | 50 +- ARMeilleure/Instructions/InstEmitHelper.cs | 30 +- ARMeilleure/Instructions/InstEmitMemory32.cs | 15 +- ARMeilleure/Instructions/InstEmitMemoryEx.cs | 90 +- .../Instructions/InstEmitMemoryEx32.cs | 240 +++++ .../Instructions/InstEmitMemoryExHelper.cs | 87 ++ .../Instructions/InstEmitMemoryHelper.cs | 67 +- ARMeilleure/Instructions/InstEmitMul32.cs | 290 ++++++ .../Instructions/InstEmitSimdArithmetic32.cs | 634 +++++++++++++ ARMeilleure/Instructions/InstEmitSimdCmp32.cs | 273 ++++++ ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 274 ++++++ .../Instructions/InstEmitSimdHelper.cs | 4 +- .../Instructions/InstEmitSimdHelper32.cs | 581 ++++++++++++ .../Instructions/InstEmitSimdLogical32.cs | 56 ++ .../Instructions/InstEmitSimdMemory32.cs | 352 +++++++ .../Instructions/InstEmitSimdMove32.cs | 336 +++++++ .../Instructions/InstEmitSimdShift32.cs | 100 ++ ARMeilleure/Instructions/InstEmitSystem32.cs | 233 +++++ ARMeilleure/Instructions/InstName.cs | 126 ++- ARMeilleure/Instructions/NativeInterface.cs | 45 +- ARMeilleure/Instructions/SoftFallback.cs | 20 + ARMeilleure/Instructions/SoftFloat.cs | 890 +++++++++++++----- .../RegisterType.cs | 3 +- ARMeilleure/State/ExecutionContext.cs | 4 + ARMeilleure/State/FPCR.cs | 6 +- ARMeilleure/State/FPSR.cs | 6 +- ARMeilleure/State/FPState.cs | 15 + ARMeilleure/State/NativeContext.cs | 42 +- ARMeilleure/State/RegisterAlias.cs | 1 + ARMeilleure/State/RegisterConsts.cs | 4 +- ARMeilleure/Translation/RegisterUsage.cs | 37 +- ARMeilleure/Translation/RejitRequest.cs | 16 + ARMeilleure/Translation/SsaConstruction.cs | 12 +- ARMeilleure/Translation/Translator.cs | 14 +- Ryujinx.Tests.Unicorn/Native/Arm32Register.cs | 139 +++ Ryujinx.Tests.Unicorn/UnicornAArch32.cs | 280 ++++++ Ryujinx.Tests/Cpu/CpuTest32.cs | 530 +++++++++++ Ryujinx.Tests/Cpu/CpuTestAlu32.cs | 61 ++ Ryujinx.Tests/Cpu/CpuTestAluRs32.cs | 84 ++ Ryujinx.Tests/Cpu/CpuTestBf32.cs | 108 +++ Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs | 61 ++ Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs | 319 +++++++ Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs | 494 ++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 351 +++++++ Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs | 116 +++ 104 files changed, 9426 insertions(+), 479 deletions(-) create mode 100644 ARMeilleure/Decoders/IOpCode32AluBf.cs create mode 100644 ARMeilleure/Decoders/IOpCode32AluReg.cs create mode 100644 ARMeilleure/Decoders/IOpCode32AluUx.cs create mode 100644 ARMeilleure/Decoders/IOpCode32MemEx.cs create mode 100644 ARMeilleure/Decoders/IOpCode32Simd.cs create mode 100644 ARMeilleure/Decoders/IOpCode32SimdImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluBf.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluImm16.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluMla.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluReg.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluRsReg.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluUmull.cs create mode 100644 ARMeilleure/Decoders/OpCode32AluUx.cs create mode 100644 ARMeilleure/Decoders/OpCode32Exception.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemLdEx.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemReg.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemRsImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32MemStEx.cs create mode 100644 ARMeilleure/Decoders/OpCode32Simd.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdBase.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdBinary.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdCmpZ.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdCvtFI.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdDupElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdDupGP.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdExt.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdImm44.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMemImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMemMult.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMemPair.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMemSingle.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMovGp.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdReg.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdRegElem.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdRegS.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdRev.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdS.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdSel.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdShImm.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdSpecial.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdSqrte.cs create mode 100644 ARMeilleure/Decoders/OpCode32SimdTbl.cs create mode 100644 ARMeilleure/Decoders/OpCode32System.cs create mode 100644 ARMeilleure/Decoders/OpCodeSimdHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitException32.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemoryEx32.cs create mode 100644 ARMeilleure/Instructions/InstEmitMemoryExHelper.cs create mode 100644 ARMeilleure/Instructions/InstEmitMul32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdCmp32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdCvt32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdHelper32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdLogical32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdMemory32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdMove32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSimdShift32.cs create mode 100644 ARMeilleure/Instructions/InstEmitSystem32.cs create mode 100644 ARMeilleure/State/FPState.cs create mode 100644 ARMeilleure/Translation/RejitRequest.cs create mode 100644 Ryujinx.Tests.Unicorn/Native/Arm32Register.cs create mode 100644 Ryujinx.Tests.Unicorn/UnicornAArch32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTest32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestAlu32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestAluRs32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestBf32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs diff --git a/ARMeilleure/Decoders/Decoder.cs b/ARMeilleure/Decoders/Decoder.cs index 8eb2a99d6..913d5082b 100644 --- a/ARMeilleure/Decoders/Decoder.cs +++ b/ARMeilleure/Decoders/Decoder.cs @@ -292,15 +292,16 @@ namespace ARMeilleure.Decoders private static bool IsCall(OpCode opCode) { - // TODO (CQ): ARM32 support. return opCode.Instruction.Name == InstName.Bl || - opCode.Instruction.Name == InstName.Blr; + opCode.Instruction.Name == InstName.Blr || + opCode.Instruction.Name == InstName.Blx; } private static bool IsException(OpCode opCode) { return opCode.Instruction.Name == InstName.Brk || opCode.Instruction.Name == InstName.Svc || + opCode.Instruction.Name == InstName.Trap || opCode.Instruction.Name == InstName.Und; } diff --git a/ARMeilleure/Decoders/DecoderHelper.cs b/ARMeilleure/Decoders/DecoderHelper.cs index bc41c61c6..6fe4678f9 100644 --- a/ARMeilleure/Decoders/DecoderHelper.cs +++ b/ARMeilleure/Decoders/DecoderHelper.cs @@ -148,5 +148,20 @@ namespace ARMeilleure.Decoders { return (((long)opCode << 45) >> 48) & ~3; } + + public static bool VectorArgumentsInvalid(bool q, params int[] args) + { + if (q) + { + for (int i = 0; i < args.Length; i++) + { + if ((args[i] & 1) == 1) + { + return true; + } + } + } + return false; + } } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32AluBf.cs b/ARMeilleure/Decoders/IOpCode32AluBf.cs new file mode 100644 index 000000000..18de3eb65 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluBf.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluBf + { + int Rd { get; } + int Rn { get; } + + int Msb { get; } + int Lsb { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32AluReg.cs b/ARMeilleure/Decoders/IOpCode32AluReg.cs new file mode 100644 index 000000000..1612cc5c9 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluReg : IOpCode32Alu + { + int Rm { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32AluUx.cs b/ARMeilleure/Decoders/IOpCode32AluUx.cs new file mode 100644 index 000000000..d03c7e219 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32AluUx.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluUx : IOpCode32AluReg + { + int RotateBits { get; } + bool Add { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32Mem.cs b/ARMeilleure/Decoders/IOpCode32Mem.cs index 0585ab53a..8fdc9dadf 100644 --- a/ARMeilleure/Decoders/IOpCode32Mem.cs +++ b/ARMeilleure/Decoders/IOpCode32Mem.cs @@ -6,7 +6,6 @@ namespace ARMeilleure.Decoders int Rn { get; } bool WBack { get; } - bool IsLoad { get; } } } \ No newline at end of file diff --git a/ARMeilleure/Decoders/IOpCode32MemEx.cs b/ARMeilleure/Decoders/IOpCode32MemEx.cs new file mode 100644 index 000000000..aca7200a5 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32MemEx.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemEx : IOpCode32Mem + { + int Rd { get; } + } +} diff --git a/ARMeilleure/Decoders/IOpCode32Simd.cs b/ARMeilleure/Decoders/IOpCode32Simd.cs new file mode 100644 index 000000000..687254d92 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32Simd.cs @@ -0,0 +1,4 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Simd : IOpCode32, IOpCodeSimd { } +} diff --git a/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/ARMeilleure/Decoders/IOpCode32SimdImm.cs new file mode 100644 index 000000000..a0cb669c7 --- /dev/null +++ b/ARMeilleure/Decoders/IOpCode32SimdImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32SimdImm : IOpCode32Simd + { + int Vd { get; } + long Immediate { get; } + int Elems { get; } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluBf.cs b/ARMeilleure/Decoders/OpCode32AluBf.cs new file mode 100644 index 000000000..7ee0ee34e --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluBf.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluBf : OpCode32, IOpCode32AluBf + { + public int Rd { get; private set; } + public int Rn { get; private set; } + + public int Msb { get; private set; } + + public int Lsb { get; private set; } + + public int SourceMask => (int)(0xFFFFFFFF >> (31 - Msb)); + public int DestMask => SourceMask & (int)(0xFFFFFFFF << Lsb); + + public OpCode32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rn = (opCode >> 0) & 0xf; + + Msb = (opCode >> 16) & 0x1f; + Lsb = (opCode >> 7) & 0x1f; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluImm16.cs b/ARMeilleure/Decoders/OpCode32AluImm16.cs new file mode 100644 index 000000000..dbc02932a --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluImm16.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluImm16 : OpCode32Alu + { + public int Immediate { get; private set; } + + public OpCode32AluImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm12 = opCode & 0xfff; + int imm4 = (opCode >> 16) & 0xf; + + Immediate = (imm4 << 12) | imm12; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluMla.cs b/ARMeilleure/Decoders/OpCode32AluMla.cs new file mode 100644 index 000000000..4570aa4e2 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluMla.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluMla : OpCode32, IOpCode32AluReg + { + public int Rn { get; private set; } + public int Rm { get; private set; } + public int Ra { get; private set; } + public int Rd { get; private set; } + + public bool NHigh { get; private set; } + public bool MHigh { get; private set; } + public bool R { get; private set; } + public bool SetFlags { get; private set; } + + public OpCode32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0xf; + Rm = (opCode >> 8) & 0xf; + Ra = (opCode >> 12) & 0xf; + Rd = (opCode >> 16) & 0xf; + R = (opCode & (1 << 5)) != 0; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluReg.cs b/ARMeilleure/Decoders/OpCode32AluReg.cs new file mode 100644 index 000000000..e378dd05d --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluReg : OpCode32Alu, IOpCode32AluReg + { + public int Rm { get; private set; } + + public OpCode32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/ARMeilleure/Decoders/OpCode32AluRsImm.cs index 779d6cecf..68ca0d0cb 100644 --- a/ARMeilleure/Decoders/OpCode32AluRsImm.cs +++ b/ARMeilleure/Decoders/OpCode32AluRsImm.cs @@ -2,15 +2,15 @@ namespace ARMeilleure.Decoders { class OpCode32AluRsImm : OpCode32Alu { - public int Rm { get; private set; } - public int Imm { get; private set; } + public int Rm { get; private set; } + public int Immediate { get; private set; } public ShiftType ShiftType { get; private set; } public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Rm = (opCode >> 0) & 0xf; - Imm = (opCode >> 7) & 0x1f; + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; ShiftType = (ShiftType)((opCode >> 5) & 3); } diff --git a/ARMeilleure/Decoders/OpCode32AluRsReg.cs b/ARMeilleure/Decoders/OpCode32AluRsReg.cs new file mode 100644 index 000000000..d195987bf --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluRsReg.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluRsReg : OpCode32Alu + { + public int Rm { get; private set; } + public int Rs { get; private set; } + + public ShiftType ShiftType { get; private set; } + + public OpCode32AluRsReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rs = (opCode >> 8) & 0xf; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluUmull.cs b/ARMeilleure/Decoders/OpCode32AluUmull.cs new file mode 100644 index 000000000..c98d9305b --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluUmull.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluUmull : OpCode32 + { + public int RdLo { get; private set; } + public int RdHi { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + + public bool NHigh { get; private set; } + public bool MHigh { get; private set; } + + public bool SetFlags { get; private set; } + public DataOp DataOp { get; private set; } + + public OpCode32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RdLo = (opCode >> 12) & 0xf; + RdHi = (opCode >> 16) & 0xf; + Rm = (opCode >> 8) & 0xf; + Rn = (opCode >> 0) & 0xf; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + + SetFlags = ((opCode >> 20) & 0x1) != 0; + DataOp = DataOp.Arithmetic; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32AluUx.cs b/ARMeilleure/Decoders/OpCode32AluUx.cs new file mode 100644 index 000000000..55c10209d --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32AluUx.cs @@ -0,0 +1,16 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32AluUx : OpCode32AluReg, IOpCode32AluUx + { + public int Rotate { get; private set; } + public int RotateBits => Rotate * 8; + public bool Add => Rn != RegisterAlias.Aarch32Pc; + + public OpCode32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rotate = (opCode >> 10) & 0x3; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Exception.cs b/ARMeilleure/Decoders/OpCode32Exception.cs new file mode 100644 index 000000000..1acdf5b12 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Exception.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Exception : OpCode32 + { + public int Id { get; private set; } + + public OpCode32Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = opCode & 0xFFFFFF; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Mem.cs b/ARMeilleure/Decoders/OpCode32Mem.cs index f4e88d592..b749f3dbe 100644 --- a/ARMeilleure/Decoders/OpCode32Mem.cs +++ b/ARMeilleure/Decoders/OpCode32Mem.cs @@ -4,7 +4,7 @@ namespace ARMeilleure.Decoders { class OpCode32Mem : OpCode32, IOpCode32Mem { - public int Rt { get; private set; } + public int Rt { get; protected set; } public int Rn { get; private set; } public int Immediate { get; protected set; } diff --git a/ARMeilleure/Decoders/OpCode32MemLdEx.cs b/ARMeilleure/Decoders/OpCode32MemLdEx.cs new file mode 100644 index 000000000..42d1a33a0 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemLdEx.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemLdEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; private set; } + + public OpCode32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemReg.cs b/ARMeilleure/Decoders/OpCode32MemReg.cs new file mode 100644 index 000000000..ccc05a875 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemReg.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemReg : OpCode32Mem + { + public int Rm { get; private set; } + + public OpCode32MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemRsImm.cs b/ARMeilleure/Decoders/OpCode32MemRsImm.cs new file mode 100644 index 000000000..299e83e2c --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemRsImm.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemRsImm : OpCode32Mem + { + public int Rm { get; private set; } + public ShiftType ShiftType { get; private set; } + + public OpCode32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32MemStEx.cs b/ARMeilleure/Decoders/OpCode32MemStEx.cs new file mode 100644 index 000000000..b9c6d4f4f --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32MemStEx.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemStEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; private set; } + + public OpCode32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rt = (opCode >> 0) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32Simd.cs b/ARMeilleure/Decoders/OpCode32Simd.cs new file mode 100644 index 000000000..cda10c3c8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32Simd.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Simd : OpCode32SimdBase + { + public int Opc { get; protected set; } + public bool Q { get; protected set; } + public bool F { get; protected set; } + public bool U { get; private set; } + + public OpCode32Simd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 20) & 0x3; + Q = ((opCode >> 6) & 0x1) != 0; + F = ((opCode >> 10) & 0x1) != 0; + U = ((opCode >> 24) & 0x1) != 0; + Opc = (opCode >> 7) & 0x3; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32Simd) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdBase.cs b/ARMeilleure/Decoders/OpCode32SimdBase.cs new file mode 100644 index 000000000..10b546597 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdBase.cs @@ -0,0 +1,52 @@ +using System; + +namespace ARMeilleure.Decoders +{ + abstract class OpCode32SimdBase : OpCode32, IOpCode32Simd + { + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Size { get; protected set; } + + // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it, + // depending on instruction type. + // + // Qx: The quadword register that the target vector is contained in. + // Ix: The starting index of the target vector within the quadword, with size treated as integer. + // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32) + public int Qd => GetQuadwordIndex(Vd); + public int Id => GetQuadwordSubindex(Vd) << (3 - Size); + public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 is fp16 which is an optional extension in ARMv8.2. We always assume 64. + + public int Qm => GetQuadwordIndex(Vm); + public int Im => GetQuadwordSubindex(Vm) << (3 - Size); + public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1)); + + protected int GetQuadwordIndex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + case RegisterSize.Simd64: + return index >> 1; + } + + throw new InvalidOperationException(); + } + + protected int GetQuadwordSubindex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + return 0; + case RegisterSize.Simd64: + return index & 1; + } + + throw new InvalidOperationException(); + } + + public OpCode32SimdBase(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdBinary.cs b/ARMeilleure/Decoders/OpCode32SimdBinary.cs new file mode 100644 index 000000000..66f63dc5a --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdBinary.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + /// + /// A special alias that always runs in 64 bit int, to speed up binary ops a little. + /// + class OpCode32SimdBinary : OpCode32SimdReg + { + public OpCode32SimdBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = 3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs new file mode 100644 index 000000000..567147fbe --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCmpZ : OpCode32Simd + { + public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 18) & 0x3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs new file mode 100644 index 000000000..aaedcb3cf --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtFI : OpCode32SimdS + { + public int Opc2 { get; private set; } + + public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc2 = (opCode >> 16) & 0x7; + Opc = (opCode >> 7) & 0x1; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs new file mode 100644 index 000000000..fd83aee5f --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdDupElem.cs @@ -0,0 +1,40 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupElem : OpCode32Simd + { + public int Index { get; private set; } + + public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + var opc = (opCode >> 16) & 0xf; + + if ((opc & 0b1) == 1) + { + Size = 0; + Index = (opc >> 1) & 0x7; + } + else if ((opc & 0b11) == 0b10) + { + Size = 1; + Index = (opc >> 2) & 0x3; + } + else if ((opc & 0b111) == 0b100) + { + Size = 2; + Index = (opc >> 3) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdDupGP.cs b/ARMeilleure/Decoders/OpCode32SimdDupGP.cs new file mode 100644 index 000000000..58a8a7fe3 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdDupGP.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupGP : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + public int Vd { get; private set; } + public int Rt { get; private set; } + public bool Q { get; private set; } + + public OpCode32SimdDupGP(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = 2 - (((opCode >> 21) & 0x2) | ((opCode >> 5) & 0x1)); // B:E - 0 for 32, 16 then 8. + if (Size == -1) + { + Instruction = InstDescriptor.Undefined; + return; + } + Q = ((opCode >> 21) & 0x1) != 0; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = ((opCode >> 12) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdExt.cs b/ARMeilleure/Decoders/OpCode32SimdExt.cs new file mode 100644 index 000000000..1ee0485ee --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdExt.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdExt : OpCode32SimdReg + { + public int Immediate { get; private set; } + + public OpCode32SimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 8) & 0xf; + Size = 0; + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn) || (!Q && Immediate > 7)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdImm.cs b/ARMeilleure/Decoders/OpCode32SimdImm.cs new file mode 100644 index 000000000..72fca59ca --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdImm.cs @@ -0,0 +1,37 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm : OpCode32, IOpCode32SimdImm + { + public int Vd { get; private set; } + public bool Q { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public int Elems => GetBytesCount() >> Size; + + public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Q = ((opCode >> 6) & 0x1) > 0; + + int cMode = (opCode >> 8) & 0xf; + int op = (opCode >> 5) & 0x1; + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0x70; + imm |= ((uint)opCode >> 17) & 0x80; + + (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2); + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/ARMeilleure/Decoders/OpCode32SimdImm44.cs new file mode 100644 index 000000000..f8f73bbba --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdImm44.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm44 : OpCode32, IOpCode32SimdImm + { + public int Vd { get; private set; } + public long Immediate { get; private set; } + public int Size { get; private set; } + public int Elems { get; private set; } + + public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0xf0; + + Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm]; + + RegisterSize = (!single) ? RegisterSize.Int64 : RegisterSize.Int32; + Elems = 1; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs new file mode 100644 index 000000000..630566cc3 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemImm.cs @@ -0,0 +1,35 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemImm : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Size { get; private set; } + public bool Add { get; private set; } + public int Immediate { get; private set; } + + public OpCode32SimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = opCode & 0xff; + + Rn = (opCode >> 16) & 0xf; + Size = (opCode >> 8) & 0x3; + + Immediate <<= (Size == 1) ? 1 : 2; + + bool u = (opCode & (1 << 23)) != 0; + Add = u; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs new file mode 100644 index 000000000..9d43a71eb --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemMult.cs @@ -0,0 +1,71 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemMult : OpCode32 + { + public int Rn { get; private set; } + public int Vd { get; private set; } + + public int RegisterRange { get; private set; } + public int Offset { get; private set; } + public int PostOffset { get; private set; } + public bool IsLoad { get; private set; } + public bool DoubleWidth { get; private set; } + public bool Add { get; private set; } + + public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + if (p == u && w) + { + Instruction = InstDescriptor.Undefined; + return; + } + + DoubleWidth = (opCode & (1 << 8)) != 0; + + if (!DoubleWidth) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + Add = u; + + RegisterRange = opCode & 0xff; + + int regsSize = RegisterRange * 4; // Double mode is still measured in single register size. + + if (!u) + { + Offset -= regsSize; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + + int regs = DoubleWidth ? RegisterRange / 2 : RegisterRange; + + if (RegisterRange == 0 || RegisterRange > 32 || Vd + regs > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs new file mode 100644 index 000000000..93320e7f9 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemPair.cs @@ -0,0 +1,46 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemPair : OpCode32, IOpCode32Simd + { + private static int[] RegsMap = + { + 1, 1, 4, 2, + 1, 1, 3, 1, + 1, 1, 2, 1, + 1, 1, 1, 1 + }; + + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + public int Align { get; private set; } + public bool WBack { get; private set; } + public bool RegisterIndex { get; private set; } + public int Size { get; private set; } + public int Elems => 8 >> Size; + public int Regs { get; private set; } + public int Increment { get; private set; } + + public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Size = (opCode >> 6) & 0x3; + + Align = (opCode >> 4) & 0x3; + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + + Regs = RegsMap[(opCode >> 8) & 0xf]; + + Increment = Math.Min(Regs, ((opCode >> 8) & 0x1) + 1); + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs new file mode 100644 index 000000000..8cdd37436 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs @@ -0,0 +1,46 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemSingle : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Rn { get; private set; } + public int Rm { get; private set; } + public int IndexAlign { get; private set; } + public int Index { get; private set; } + public bool WBack { get; private set; } + public bool RegisterIndex { get; private set; } + public int Size { get; private set; } + public bool Replicate { get; private set; } + public int Increment { get; private set; } + + public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + IndexAlign = (opCode >> 4) & 0xf; + + Size = (opCode >> 10) & 0x3; + Replicate = Size == 3; + if (Replicate) + { + Size = (opCode >> 6) & 0x3; + Increment = ((opCode >> 5) & 1) + 1; + Index = 0; + } + else + { + Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2; + Index = IndexAlign >> (1 + Size); + } + + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs new file mode 100644 index 000000000..918291a17 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGp.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGp : OpCode32, IOpCode32Simd + { + public int Size => 2; + + public int Vn { get; private set; } + public int Rt { get; private set; } + public int Op { get; private set; } + + public int Opc1 { get; private set; } + public int Opc2 { get; private set; } + + public OpCode32SimdMovGp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Opc1 = (opCode >> 21) & 0x3; + Opc2 = (opCode >> 5) & 0x3; + + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs b/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs new file mode 100644 index 000000000..5f2725e15 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpDouble : OpCode32, IOpCode32Simd + { + public int Size => 3; + + public int Vm { get; private set; } + public int Rt { get; private set; } + public int Rt2 { get; private set; } + public int Op { get; private set; } + + public OpCode32SimdMovGpDouble(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Rt = (opCode >> 12) & 0xf; + Rt2 = (opCode >> 16) & 0xf; + + bool single = (opCode & (1 << 8)) == 0; + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs new file mode 100644 index 000000000..350d5c846 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs @@ -0,0 +1,46 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd + { + public int Size { get; private set; } + + public int Vd { get; private set; } + public int Rt { get; private set; } + public int Op { get; private set; } + public bool U { get; private set; } + + public int Index { get; private set; } + + public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Op = (opCode >> 20) & 0x1; + U = ((opCode >> 23) & 1) != 0; + + var opc = (((opCode >> 23) & 1) << 4) | (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3); + + if ((opc & 0b01000) == 0b01000) + { + Size = 0; + Index = opc & 0x7; + } + else if ((opc & 0b01001) == 0b00001) + { + Size = 1; + Index = (opc >> 1) & 0x3; + } + else if ((opc & 0b11011) == 0) + { + Size = 2; + Index = (opc >> 2) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + return; + } + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdReg.cs b/ARMeilleure/Decoders/OpCode32SimdReg.cs new file mode 100644 index 000000000..da1d2e363 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdReg : OpCode32Simd + { + public int Vn { get; private set; } + + public int Qn => GetQuadwordIndex(Vn); + public int In => GetQuadwordSubindex(Vn) << (3 - Size); + public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1)); + + public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdReg) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs new file mode 100644 index 000000000..4bf15cca5 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElem : OpCode32SimdReg + { + public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = ((opCode >> 24) & 0x1) != 0; + F = ((opCode >> 8) & 0x1) != 0; + Size = ((opCode >> 20) & 0x3); + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRegS.cs b/ARMeilleure/Decoders/OpCode32SimdRegS.cs new file mode 100644 index 000000000..b4ffad80a --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegS.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegS : OpCode32SimdS + { + public int Vn { get; private set; } + + public OpCode32SimdRegS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + bool single = Size != 3; + if (single) + { + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + } + else + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdRev.cs b/ARMeilleure/Decoders/OpCode32SimdRev.cs new file mode 100644 index 000000000..6cdf9f577 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRev.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRev : OpCode32SimdCmpZ + { + public OpCode32SimdRev(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + // Currently, this instruction is treated as though it's OPCODE is the true size, + // which lets us deal with reversing vectors on a single element basis (eg. math magic an I64 rather than insert lots of I8s). + int tempSize = Size; + Size = 3 - Opc; // Op 0 is 64 bit, 1 is 32 and so on. + Opc = tempSize; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdS.cs b/ARMeilleure/Decoders/OpCode32SimdS.cs new file mode 100644 index 000000000..2e860d9c8 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdS.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdS : OpCode32, IOpCode32Simd + { + public int Vd { get; private set; } + public int Vm { get; private set; } + public int Opc { get; protected set; } + public int Size { get; protected set; } + + public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc = (opCode >> 15) & 0x3; + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + RegisterSize = single ? RegisterSize.Int32 : RegisterSize.Int64; + + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSel.cs b/ARMeilleure/Decoders/OpCode32SimdSel.cs new file mode 100644 index 000000000..aefe138fa --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSel.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSel : OpCode32SimdRegS + { + public OpCode32SimdSelMode Cc { get; private set; } + + public OpCode32SimdSel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cc = (OpCode32SimdSelMode)((opCode >> 20) & 3); + } + } + + enum OpCode32SimdSelMode : int + { + Eq = 0, + Vs, + Ge, + Gt + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs new file mode 100644 index 000000000..b19a601fb --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImm : OpCode32Simd + { + public int Immediate { get; private set; } + public int Shift { get; private set; } + + public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode >> 16) & 0x3f; + var limm = ((opCode >> 1) & 0x40) | Immediate; + + if ((limm & 0x40) == 0b1000000) + { + Size = 3; + Shift = Immediate; + } + else if ((limm & 0x60) == 0b0100000) + { + Size = 2; + Shift = Immediate - 32; + } + else if ((limm & 0x70) == 0b0010000) + { + Size = 1; + Shift = Immediate - 16; + } + else if ((limm & 0x78) == 0b0001000) + { + Size = 0; + Shift = Immediate - 8; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSpecial.cs b/ARMeilleure/Decoders/OpCode32SimdSpecial.cs new file mode 100644 index 000000000..986afcf94 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSpecial.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSpecial : OpCode32 + { + public int Rt { get; private set; } + public int Sreg { get; private set; } + + public OpCode32SimdSpecial(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Sreg = (opCode >> 16) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdSqrte.cs b/ARMeilleure/Decoders/OpCode32SimdSqrte.cs new file mode 100644 index 000000000..9eb7f775d --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdSqrte.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSqrte : OpCode32Simd + { + public OpCode32SimdSqrte(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = (opCode >> 18) & 0x1; + F = ((opCode >> 8) & 0x1) != 0; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/ARMeilleure/Decoders/OpCode32SimdTbl.cs new file mode 100644 index 000000000..e59627c30 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdTbl.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdTbl : OpCode32SimdReg + { + public int Length { get; private set; } + + public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Length = (opCode >> 8) & 3; + Size = 0; + Opc = Q ? 1 : 0; + Q = false; + RegisterSize = RegisterSize.Simd64; + + if (Vn + Length + 1 > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCode32System.cs b/ARMeilleure/Decoders/OpCode32System.cs new file mode 100644 index 000000000..bf4383017 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32System.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32System : OpCode32 + { + public int Opc1 { get; private set; } + public int CRn { get; private set; } + public int Rt { get; private set; } + public int Opc2 { get; private set; } + public int CRm { get; private set; } + public int MrrcOp { get; private set; } + + public int Coproc { get; private set; } + + public OpCode32System(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc1 = (opCode >> 21) & 0x7; + CRn = (opCode >> 16) & 0xf; + Rt = (opCode >> 12) & 0xf; + Opc2 = (opCode >> 5) & 0x7; + CRm = (opCode >> 0) & 0xf; + MrrcOp = (opCode >> 4) & 0xf; + + Coproc = (opCode >> 8) & 0xf; + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/ARMeilleure/Decoders/OpCodeSimdHelper.cs new file mode 100644 index 000000000..3e5a7f65b --- /dev/null +++ b/ARMeilleure/Decoders/OpCodeSimdHelper.cs @@ -0,0 +1,88 @@ +namespace ARMeilleure.Decoders +{ + public static class OpCodeSimdHelper + { + public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0) + { + int modeLow = cMode & 1; + int modeHigh = cMode >> 1; + int size = 0; + + if (modeHigh == 0b111) + { + switch (op | (modeLow << 1)) + { + case 0: + // 64-bits Immediate. + // Transform abcd efgh into abcd efgh abcd efgh ... + size = 3; + imm = (long)((ulong)imm * 0x0101010101010101); + break; + + case 1: + // 64-bits Immediate. + // Transform abcd efgh into aaaa aaaa bbbb bbbb ... + size = 3; + imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4; + imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2; + imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1; + + imm = (long)((ulong)imm * 0x8040201008040201); + imm = (long)((ulong)imm & 0x8080808080808080); + + imm |= imm >> 4; + imm |= imm >> 2; + imm |= imm >> 1; + break; + + case 2: + // 2 x 32-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; + imm |= imm << 32; + break; + + case 3: + // 64-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; + break; + } + } + else if ((modeHigh & 0b110) == 0b100) + { + // 16-bits shifted Immediate. + size = 1; imm <<= (modeHigh & 1) << 3; + } + else if ((modeHigh & 0b100) == 0b000) + { + // 32-bits shifted Immediate. + size = 2; imm <<= modeHigh << 3; + } + else if ((modeHigh & 0b111) == 0b110) + { + // 32-bits shifted Immediate (fill with ones). + size = 2; imm = ShlOnes(imm, 8 << modeLow); + } + else + { + // 8-bits without shift. + size = 0; + } + + return (imm, size); + } + + private static long ShlOnes(long value, int shift) + { + if (shift != 0) + { + return value << shift | (long)(ulong.MaxValue >> (64 - shift)); + } + else + { + return value; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 2fa7702d9..3915ac87b 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -599,32 +599,283 @@ namespace ARMeilleure.Decoders #region "OpCode Table (AArch32)" // Base - SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm)); - SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm)); - SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm)); - SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm)); - SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm)); - SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg)); - SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg)); - SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm)); - SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm)); - SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult)); - SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm)); - SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8)); - SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm)); - SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm)); - SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8)); - SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult)); - SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm)); - SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8)); - SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8)); - SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm)); - SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm)); + SetA32("<<<<0010101xxxxxxxxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluImm)); + SetA32("<<<<0000101xxxxxxxxxxxxxxxx0xxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000101xxxxxxxxxxxxx0xx1xxxx", InstName.Adc, InstEmit32.Adc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluImm)); + SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000100xxxxxxxxxxxxx0xx1xxxx", InstName.Add, InstEmit32.Add, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010000xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluImm)); + SetA32("<<<<0000000xxxxxxxxxxxxxxxx0xxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000000xxxxxxxxxxxxx0xx1xxxx", InstName.And, InstEmit32.And, typeof(OpCode32AluRsReg)); + SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, typeof(OpCode32BImm)); + SetA32("<<<<0111110xxxxxxxxxxxxxx0011111", InstName.Bfc, InstEmit32.Bfc, typeof(OpCode32AluBf)); + SetA32("<<<<0111110xxxxxxxxxxxxxx001xxxx", InstName.Bfi, InstEmit32.Bfi, typeof(OpCode32AluBf)); + SetA32("<<<<0011110xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluImm)); + SetA32("<<<<0001110xxxxxxxxxxxxxxxx0xxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001110xxxxxxxxxxxxx0xx1xxxx", InstName.Bic, InstEmit32.Bic, typeof(OpCode32AluRsReg)); + SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, typeof(OpCode32BImm)); + SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, typeof(OpCode32BImm)); + SetA32("<<<<000100101111111111110011xxxx", InstName.Blx, InstEmit32.Blxr, typeof(OpCode32BReg)); + SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, typeof(OpCode32BReg)); + SetT32("xxxxxxxxxxxxxxxx010001110xxxx000", InstName.Bx, InstEmit32.Bx, typeof(OpCodeT16BReg)); + SetA32("11110101011111111111000000011111", InstName.Clrex, InstEmit32.Clrex, typeof(OpCode32)); + SetA32("<<<<000101101111xxxx11110001xxxx", InstName.Clz, InstEmit32.Clz, typeof(OpCode32AluReg)); + SetA32("<<<<00110111xxxx0000xxxxxxxxxxxx", InstName.Cmn, InstEmit32.Cmn, typeof(OpCode32AluImm)); + SetA32("<<<<00010111xxxx0000xxxxxxx0xxxx", InstName.Cmn, InstEmit32.Cmn, typeof(OpCode32AluRsImm)); + SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluImm)); + SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010101xxxx0000xxxx0xx1xxxx", InstName.Cmp, InstEmit32.Cmp, typeof(OpCode32AluRsReg)); + SetA32("1111010101111111111100000101xxxx", InstName.Dmb, InstEmit32.Dmb, typeof(OpCode32)); + SetA32("1111010101111111111100000100xxxx", InstName.Dsb, InstEmit32.Dsb, typeof(OpCode32)); + SetA32("<<<<0010001xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluImm)); + SetA32("<<<<0000001xxxxxxxxxxxxxxxx0xxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000001xxxxxxxxxxxxx0xx1xxxx", InstName.Eor, InstEmit32.Eor, typeof(OpCode32AluRsReg)); + SetA32("1111010101111111111100000110xxxx", InstName.Isb, InstEmit32.Nop, typeof(OpCode32)); + SetA32("<<<<00011001xxxxxxxx110010011111", InstName.Lda, InstEmit32.Lda, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx110010011111", InstName.Ldab, InstEmit32.Ldab, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011001xxxxxxxx111010011111", InstName.Ldaex, InstEmit32.Ldaex, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx111010011111", InstName.Ldaexb, InstEmit32.Ldaexb, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011011xxxxxxxx111010011111", InstName.Ldaexd, InstEmit32.Ldaexd, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx111010011111", InstName.Ldaexh, InstEmit32.Ldaexh, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx110010011111", InstName.Ldah, InstEmit32.Ldah, typeof(OpCode32MemLdEx)); + SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemImm)); + SetA32("<<<<011xx0x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldr, InstEmit32.Ldr, typeof(OpCode32MemRsImm)); + SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemImm)); + SetA32("<<<<011xx1x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldrb, InstEmit32.Ldrb, typeof(OpCode32MemRsImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001101xxxx", InstName.Ldrd, InstEmit32.Ldrd, typeof(OpCode32MemReg)); + SetA32("<<<<00011001xxxxxxxx111110011111", InstName.Ldrex, InstEmit32.Ldrex, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011101xxxxxxxx111110011111", InstName.Ldrexb, InstEmit32.Ldrexb, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011011xxxxxxxx111110011111", InstName.Ldrexd, InstEmit32.Ldrexd, typeof(OpCode32MemLdEx)); + SetA32("<<<<00011111xxxxxxxx111110011111", InstName.Ldrexh, InstEmit32.Ldrexh, typeof(OpCode32MemLdEx)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001011xxxx", InstName.Ldrh, InstEmit32.Ldrh, typeof(OpCode32MemReg)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, typeof(OpCode32MemReg)); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x1xxxxxxxx00001111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, typeof(OpCode32MemReg)); + SetA32("<<<<1110xxx0xxxxxxxx111xxxx1xxxx", InstName.Mcr, InstEmit32.Mcr, typeof(OpCode32System)); + SetA32("<<<<0000001xxxxxxxxxxxxx1001xxxx", InstName.Mla, InstEmit32.Mla, typeof(OpCode32AluMla)); + SetA32("<<<<00000110xxxxxxxxxxxx1001xxxx", InstName.Mls, InstEmit32.Mls, typeof(OpCode32AluMla)); + SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm)); + SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001101x0000xxxxxxxx0xx1xxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluRsReg)); + SetA32("<<<<00110000xxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCode32AluImm16)); + SetT32("xxxxxxxxxxxxxxxx00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, typeof(OpCodeT16AluImm8)); + SetA32("<<<<00110100xxxxxxxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, typeof(OpCode32AluImm16)); + SetA32("<<<<1110xxx1xxxxxxxx111xxxx1xxxx", InstName.Mrc, InstEmit32.Mrc, typeof(OpCode32System)); + SetA32("<<<<11000101xxxxxxxx111xxxxxxxxx", InstName.Mrrc, InstEmit32.Mrrc, typeof(OpCode32System)); + SetA32("<<<<0000000xxxxx0000xxxx1001xxxx", InstName.Mul, InstEmit32.Mul, typeof(OpCode32AluMla)); + SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluImm)); + SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsReg)); + SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluImm)); + SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsImm)); + SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsReg)); + SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, typeof(OpCode32AluRsImm)); + SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, typeof(OpCode32)); + SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, typeof(OpCode32)); + SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, typeof(OpCode32AluReg)); + SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, typeof(OpCode32AluReg)); + SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, typeof(OpCode32AluReg)); + SetA32("<<<<011011111111xxxx11111011xxxx", InstName.Revsh, InstEmit32.Revsh, typeof(OpCode32AluReg)); + SetA32("<<<<0010011xxxxxxxxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluImm)); + SetA32("<<<<0000011xxxxxxxxxxxxxxxx0xxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000011xxxxxxxxxxxxx0xx1xxxx", InstName.Rsb, InstEmit32.Rsb, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluImm)); + SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc, InstEmit32.Rsc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluImm)); + SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, typeof(OpCode32AluBf)); + SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, typeof(OpCode32AluMla)); + SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smlab, InstEmit32.Smlab, typeof(OpCode32AluMla)); + SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, typeof(OpCode32AluUmull)); + SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlalh, InstEmit32.Smlalh, typeof(OpCode32AluUmull)); + SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, typeof(OpCode32AluMla)); + SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, typeof(OpCode32AluMla)); + SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smulh, InstEmit32.Smulh, typeof(OpCode32AluMla)); + SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, typeof(OpCode32AluUmull)); + SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl, InstEmit32.Stl, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb, InstEmit32.Stlb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex, InstEmit32.Stlex, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxxxxxx11101001xxxx", InstName.Stlexb, InstEmit32.Stlexb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011010xxxxxxxx11101001xxxx", InstName.Stlexd, InstEmit32.Stlexd, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxxxxxx11101001xxxx", InstName.Stlexh, InstEmit32.Stlexh, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxx111111001001xxxx", InstName.Stlh, InstEmit32.Stlh, typeof(OpCode32MemStEx)); + SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, typeof(OpCode32MemMult)); + SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemImm)); + SetA32("<<<<011xx0x0xxxxxxxxxxxxxxx0xxxx", InstName.Str, InstEmit32.Str, typeof(OpCode32MemRsImm)); + SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemImm)); + SetA32("<<<<011xx1x0xxxxxxxxxxxxxxx0xxxx", InstName.Strb, InstEmit32.Strb, typeof(OpCode32MemRsImm)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001111xxxx", InstName.Strd, InstEmit32.Strd, typeof(OpCode32MemReg)); + SetA32("<<<<00011000xxxxxxxx11111001xxxx", InstName.Strex, InstEmit32.Strex, typeof(OpCode32MemStEx)); + SetA32("<<<<00011100xxxxxxxx11111001xxxx", InstName.Strexb, InstEmit32.Strexb, typeof(OpCode32MemStEx)); + SetA32("<<<<00011010xxxxxxxx11111001xxxx", InstName.Strexd, InstEmit32.Strexd, typeof(OpCode32MemStEx)); + SetA32("<<<<00011110xxxxxxxx11111001xxxx", InstName.Strexh, InstEmit32.Strexh, typeof(OpCode32MemStEx)); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemImm8)); + SetA32("<<<<000xx0x0xxxxxxxx00001011xxxx", InstName.Strh, InstEmit32.Strh, typeof(OpCode32MemReg)); + SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluImm)); + SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsImm)); + SetA32("<<<<0000010xxxxxxxxxxxxx0xx1xxxx", InstName.Sub, InstEmit32.Sub, typeof(OpCode32AluRsReg)); + SetA32("<<<<1111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Svc, InstEmit32.Svc, typeof(OpCode32Exception)); + SetA32("<<<<01101010xxxxxxxxxx000111xxxx", InstName.Sxtb, InstEmit32.Sxtb, typeof(OpCode32AluUx)); + SetA32("<<<<01101000xxxxxxxxxx000111xxxx", InstName.Sxtb16, InstEmit32.Sxtb16, typeof(OpCode32AluUx)); + SetA32("<<<<01101011xxxxxxxxxx000111xxxx", InstName.Sxth, InstEmit32.Sxth, typeof(OpCode32AluUx)); + SetA32("<<<<00110011xxxx0000xxxxxxxxxxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluImm)); + SetA32("<<<<00010011xxxx0000xxxxxxx0xxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010011xxxx0000xxxx0xx1xxxx", InstName.Teq, InstEmit32.Teq, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111111111111101111011111110", InstName.Trap, InstEmit32.Trap, typeof(OpCode32Exception)); + SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluImm)); + SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsImm)); + SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsReg)); + SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, typeof(OpCode32AluBf)); + SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, typeof(OpCode32AluMla)); + SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, typeof(OpCode32AluUmull)); + SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, typeof(OpCode32AluUmull)); + SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, typeof(OpCode32AluUx)); + SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, typeof(OpCode32AluUx)); + SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, typeof(OpCode32AluUx)); + + // FP & SIMD + SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); + SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); + SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); + SetA32("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, typeof(OpCode32SimdBinary)); + SetA32("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, typeof(OpCode32SimdBinary)); + SetA32("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, typeof(OpCode32SimdBinary)); + SetA32("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, typeof(OpCode32SimdBinary)); + SetA32("111100110x<>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). + SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector. + SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); + SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); + SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); + SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x10xxxxxxxx0110xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x10xxxxxxxxxx01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx100xxxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x10xxxxxxxx0011xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x10xxxxxxxxxx10xxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx010xxxxxxxxx", InstName.Vld3, InstEmit32.Vld3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x10xxxxxxxxxx11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x10xxxxxxxx000xxxxxxxxx", InstName.Vld4, InstEmit32.Vld4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, typeof(OpCode32SimdMemImm)); + SetA32("1111001x0x<>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, typeof(OpCode32SimdReg)); + SetA32("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, typeof(OpCode32SimdRegS)); + SetA32("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x<>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); + SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); + SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); + SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImm)); + SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); + SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1. + SetA32("111101000x00xxxxxxxx1010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 2. + SetA32("111101000x00xxxxxxxx0110xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 3. + SetA32("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 4. + SetA32("111101001x00xxxxxxxx<<01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx100xxxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 1, inc = 1/2 (itype). + SetA32("111101000x00xxxxxxxx0011xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, typeof(OpCode32SimdMemPair)); // Regs = 2, inc = 2. + SetA32("111101001x00xxxxxxxx<<10xxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx010xxxxxxxxx", InstName.Vst3, InstEmit32.Vst3, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("111101001x00xxxxxxxx<<11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemSingle)); + SetA32("111101000x00xxxxxxxx000xxxxxxxxx", InstName.Vst4, InstEmit32.Vst4, typeof(OpCode32SimdMemPair)); // Inc = 1/2 (itype). + SetA32("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, typeof(OpCode32SimdMemMult)); + SetA32("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, typeof(OpCode32SimdMemImm)); + SetA32("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, typeof(OpCode32SimdReg)); + SetA32("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, typeof(OpCode32SimdRegS)); + SetA32("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, typeof(OpCode32SimdReg)); + SetA32("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, typeof(OpCode32SimdTbl)); + SetA32("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, typeof(OpCode32SimdCmpZ)); + SetA32("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, typeof(OpCode32SimdCmpZ)); #endregion FillFastLookupTable(_instA32FastLookup, _allInstA32); diff --git a/ARMeilleure/Diagnostics/IRDumper.cs b/ARMeilleure/Diagnostics/IRDumper.cs index c3e99dfab..100a9b113 100644 --- a/ARMeilleure/Diagnostics/IRDumper.cs +++ b/ARMeilleure/Diagnostics/IRDumper.cs @@ -134,6 +134,7 @@ namespace ARMeilleure.Diagnostics switch (reg.Type) { case RegisterType.Flag: name = "b" + reg.Index; break; + case RegisterType.FpFlag: name = "f" + reg.Index; break; case RegisterType.Integer: name = "r" + reg.Index; break; case RegisterType.Vector: name = "v" + reg.Index; break; } diff --git a/ARMeilleure/Instructions/DelegateTypes.cs b/ARMeilleure/Instructions/DelegateTypes.cs index 424203ffa..b65149cb8 100644 --- a/ARMeilleure/Instructions/DelegateTypes.cs +++ b/ARMeilleure/Instructions/DelegateTypes.cs @@ -4,13 +4,19 @@ using System; namespace ARMeilleure.Instructions { delegate double _F64_F64(double a1); + delegate double _F64_F64_Bool(double a1, bool a2); delegate double _F64_F64_F64(double a1, double a2); + delegate double _F64_F64_F64_Bool(double a1, double a2, bool a3); delegate double _F64_F64_F64_F64(double a1, double a2, double a3); + delegate double _F64_F64_F64_F64_Bool(double a1, double a2, double a3, bool a4); delegate double _F64_F64_MidpointRounding(double a1, MidpointRounding a2); delegate float _F32_F32(float a1); + delegate float _F32_F32_Bool(float a1, bool a2); delegate float _F32_F32_F32(float a1, float a2); + delegate float _F32_F32_F32_Bool(float a1, float a2, bool a3); delegate float _F32_F32_F32_F32(float a1, float a2, float a3); + delegate float _F32_F32_F32_F32_Bool(float a1, float a2, float a3, bool a4); delegate float _F32_F32_MidpointRounding(float a1, MidpointRounding a2); delegate float _F32_U16(ushort a1); @@ -37,6 +43,7 @@ namespace ARMeilleure.Instructions delegate ushort _U16_F32(float a1); delegate ushort _U16_U64(ulong a1); + delegate uint _U32(); delegate uint _U32_F32(float a1); delegate uint _U32_F64(double a1); delegate uint _U32_U32(uint a1); @@ -74,6 +81,7 @@ namespace ARMeilleure.Instructions delegate V128 _V128_V128_V128_V128(V128 a1, V128 a2, V128 a3); delegate void _Void(); + delegate void _Void_U32(uint a1); delegate void _Void_U64(ulong a1); delegate void _Void_U64_S32(ulong a1, int a2); delegate void _Void_U64_U16(ulong a1, ushort a2); diff --git a/ARMeilleure/Instructions/InstEmitAlu.cs b/ARMeilleure/Instructions/InstEmitAlu.cs index ed1faae41..6e2875e64 100644 --- a/ARMeilleure/Instructions/InstEmitAlu.cs +++ b/ARMeilleure/Instructions/InstEmitAlu.cs @@ -276,23 +276,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); - - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); - val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); - - return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); - } - private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op) { Debug.Assert(op.Type == OperandType.I64); @@ -331,23 +314,6 @@ namespace ARMeilleure.Instructions SetAluDOrZR(context, d); } - private static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I32); - - Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); - - return context.ConvertI64ToI32(val); - } - - private static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) - { - Debug.Assert(op.Type == OperandType.I64); - - return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), - context.ShiftLeft (context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); - } - public static void Rev32(ArmEmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs index 79b0abbc3..4d03f5c24 100644 --- a/ARMeilleure/Instructions/InstEmitAlu32.cs +++ b/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -3,8 +3,8 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; -using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -31,6 +31,101 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } + public static void Adc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + res = context.Add(res, carry); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitAdcsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void And(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Bfc(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + + SetIntA32(context, op.Rd, res); + } + + public static void Bfi(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand d = GetIntA32(context, op.Rd); + Operand part = context.BitwiseAnd(n, Const(op.SourceMask)); + + if (op.Lsb != 0) + { + part = context.ShiftLeft(part, Const(op.Lsb)); + } + + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask))); + + SetIntA32(context, op.Rd, res); + } + + public static void Bic(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.CountLeadingZeros(m); + EmitAluStore(context, res); + } + public static void Cmp(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -46,6 +141,36 @@ namespace ARMeilleure.Instructions EmitSubsVCheck(context, n, m, res); } + public static void Cmn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + public static void Eor(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + public static void Mov(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -60,6 +185,210 @@ namespace ARMeilleure.Instructions EmitAluStore(context, m); } + public static void Movt(ArmEmitterContext context) + { + OpCode32AluImm16 op = (OpCode32AluImm16)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword. + Operand res = context.BitwiseAnd(d, Const(0x0000ffff)); + res = context.BitwiseOr(res, imm); + + EmitAluStore(context, res); + } + + public static void Mul(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.Multiply(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mvn(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + Operand m = GetAluM(context); + + Operand res = context.BitwiseNot(m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Orr(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseOr(n, m); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Pkh(ArmEmitterContext context) + { + OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res; + + bool tbform = op.ShiftType == ShiftType.Asr; + if (tbform) + { + res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF))); + } + else + { + res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF))); + } + + EmitAluStore(context, res); + } + + public static void Rbit(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBits32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Rev(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = context.ByteSwap(m); + + EmitAluStore(context, res); + } + + public static void Rev16(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Revsh(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, context.SignExtend16(OperandType.I32, res)); + } + + public static void Rsc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, m, n); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Rsb(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, m, res); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Sbc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Sbfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Sdiv(ArmEmitterContext context) + { + EmitDiv(context, false); + } + public static void Sub(ArmEmitterContext context) { IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; @@ -80,50 +409,216 @@ namespace ARMeilleure.Instructions EmitAluStore(context, res); } - private static void EmitAluStore(ArmEmitterContext context, Operand value) + public static void Sxtb(ArmEmitterContext context) { - IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + EmitSignExtend(context, true, 8); + } - if (op.Rd == RegisterAlias.Aarch32Pc) + public static void Sxtb16(ArmEmitterContext context) + { + EmitExtend16(context, true); + } + + public static void Sxth(ArmEmitterContext context) + { + EmitSignExtend(context, true, 16); + } + + public static void Teq(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + EmitNZFlagsCheck(context, res); + } + + public static void Tst(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + EmitNZFlagsCheck(context, res); + } + + public static void Ubfx(ArmEmitterContext context) + { + OpCode32AluBf op = (OpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Udiv(ArmEmitterContext context) + { + EmitDiv(context, true); + } + + public static void Uxtb(ArmEmitterContext context) + { + EmitSignExtend(context, false, 8); + } + + public static void Uxtb16(ArmEmitterContext context) + { + EmitExtend16(context, false); + } + + public static void Uxth(ArmEmitterContext context) + { + EmitSignExtend(context, false, 16); + } + + private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) { - if (op.SetFlags) + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + switch (bits) + { + case 8: + res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res); + break; + case 16: + res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res); + break; + } + + if (op.Add) + { + res = context.Add(res, GetAluN(context)); + } + + EmitAluStore(context, res); + } + + private static void EmitExtend16(ArmEmitterContext context, bool signed) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) + { + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + Operand low16, high16; + if (signed) + { + low16 = context.SignExtend8(OperandType.I32, res); + high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + else + { + low16 = context.ZeroExtend8(OperandType.I32, res); + high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + + if (op.Add) + { + Operand n = GetAluN(context); + Operand lowAdd, highAdd; + if (signed) { - // TODO: Load SPSR etc. - Operand isThumb = GetFlag(PState.TFlag); - - Operand lblThumb = Label(); - - context.BranchIfTrue(lblThumb, isThumb); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~3)))); - - context.MarkLabel(lblThumb); - - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); + lowAdd = context.SignExtend16(OperandType.I32, n); + highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); } else { - EmitAluWritePc(context, value); + lowAdd = context.ZeroExtend16(OperandType.I32, n); + highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); } + + low16 = context.Add(low16, lowAdd); + high16 = context.Add(high16, highAdd); } - else - { - SetIntA32(context, op.Rd, value); - } + + res = context.BitwiseOr( + context.ZeroExtend16(OperandType.I32, low16), + context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16))); + + EmitAluStore(context, res); } - private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + public static void EmitDiv(ArmEmitterContext context, bool unsigned) { - context.StoreToContext(); + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand zero = Const(m.Type, 0); - if (IsThumb(context.CurrOp)) + Operand divisorIsZero = context.ICompareEqual(m, zero); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) { - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(value, Const(~1)))); - } - else - { - EmitBxWritePc(context, value); + // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + // TODO: tests to ensure A32 works the same + + Operand intMin = Const(int.MinValue); + Operand minus1 = Const(-1); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + EmitAluStore(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); } + + Operand res = unsigned + ? context.DivideUI(n, m) + : context.Divide(n, m); + + EmitAluStore(context, res); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + EmitAluStore(context, zero); + + context.MarkLabel(lblEnd); + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + EmitGenericAluStoreA32(context, op.Rd, op.SetFlags, value); } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitAluHelper.cs b/ARMeilleure/Instructions/InstEmitAluHelper.cs index d032b32e8..3bb87f273 100644 --- a/ARMeilleure/Instructions/InstEmitAluHelper.cs +++ b/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -3,6 +3,7 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; using System; +using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -77,6 +78,89 @@ namespace ARMeilleure.Instructions SetFlag(context, PState.VFlag, vOut); } + public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); + } + + public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); + } + + public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); + + return context.ConvertI64ToI32(val); + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + context.StoreToContext(); + + if (IsThumb(context.CurrOp)) + { + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitBxWritePc(context, value); + } + } + + public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + if (rd == RegisterAlias.Aarch32Pc && setFlags) + { + if (setFlags) + { + // TODO: Load SPSR etc. + Operand isThumb = GetFlag(PState.TFlag); + + Operand lblThumb = Label(); + + context.BranchIfTrue(lblThumb, isThumb); + + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(value, Const(~3)), Const(1)))); + + context.MarkLabel(lblThumb); + + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(value, Const(1)))); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, rd, value); + } + } public static Operand GetAluN(ArmEmitterContext context) { @@ -116,10 +200,15 @@ namespace ARMeilleure.Instructions return Const(op.Immediate); } + case OpCode32AluImm16 op: return Const(op.Immediate); + case OpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + case OpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry); case OpCodeT16AluImm8 op: return Const(op.Immediate); + case IOpCode32AluReg op: return GetIntA32(context, op.Rm); + // ARM64. case IOpCodeAluImm op: { @@ -167,11 +256,11 @@ namespace ARMeilleure.Instructions } // ARM32 helpers. - private static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32AluRsImm op, bool setCarry) { Operand m = GetIntA32(context, op.Rm); - int shift = op.Imm; + int shift = op.Immediate; if (shift == 0) { @@ -193,7 +282,7 @@ namespace ARMeilleure.Instructions case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; case ShiftType.Ror: - if (op.Imm != 0) + if (op.Immediate != 0) { m = GetRorC(context, m, setCarry, shift); } @@ -208,8 +297,74 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand GetMShiftedByReg(ArmEmitterContext context, OpCode32AluRsReg op, bool setCarry) { + Operand m = GetIntA32(context, op.Rm); + Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs)); + Operand shiftIsZero = context.ICompareEqual(s, Const(0)); + + Operand zeroResult = m; + Operand shiftResult = m; + + setCarry &= op.SetFlags; + + switch (op.ShiftType) + { + case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break; + } + + return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult); + } + + public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true) + { + Debug.Assert(boolValue.Type == OperandType.I32); + + Operand endLabel = Label(); + + if (expected) + { + context.BranchIfFalse(endLabel, boolValue); + } + else + { + context.BranchIfTrue(endLabel, boolValue); + } + + action(); + + context.MarkLabel(endLabel); + } + + public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftLeft(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -238,8 +393,32 @@ namespace ARMeilleure.Instructions } } - private static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftRightUI(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift > 32) { return GetShiftByMoreThan32(context, setCarry); @@ -274,8 +453,45 @@ namespace ARMeilleure.Instructions return Const(0); } - private static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand l32Result; + Operand ge32Result; + + Operand less32 = context.ICompareLess(shift, Const(32)); + + ge32Result = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () => + { + SetCarryMLsb(context, ge32Result); + }, false); + } + + l32Result = context.ShiftRightSI(m, shift); + if (setCarry) + { + EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + }); + } + + return context.ConditionalSelect(less32, l32Result, ge32Result); + } + + public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + if ((uint)shift >= 32) { m = context.ShiftRightSI(m, Const(31)); @@ -298,8 +514,28 @@ namespace ARMeilleure.Instructions } } - private static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + shift = context.BitwiseAnd(shift, Const(0x1f)); + m = context.RotateRight(m, shift); + + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + SetCarryMMsb(context, m); + }, false); + } + + return m; + } + + public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + shift &= 0x1f; m = context.RotateRight(m, Const(shift)); @@ -312,8 +548,10 @@ namespace ARMeilleure.Instructions return m; } - private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) { + Debug.Assert(m.Type == OperandType.I32); + // Rotate right by 1 with carry. Operand cIn = context.Copy(GetFlag(PState.CFlag)); @@ -331,16 +569,22 @@ namespace ARMeilleure.Instructions private static void SetCarryMLsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); } private static void SetCarryMMsb(ArmEmitterContext context, Operand m) { + Debug.Assert(m.Type == OperandType.I32); + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); } private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) { + Debug.Assert(m.Type == OperandType.I32); + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); cOut = context.BitwiseAnd(cOut, Const(1)); diff --git a/ARMeilleure/Instructions/InstEmitException32.cs b/ARMeilleure/Instructions/InstEmitException32.cs new file mode 100644 index 000000000..a73f0dec7 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitException32.cs @@ -0,0 +1,36 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Svc(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.SupervisorCall); + } + + public static void Trap(ArmEmitterContext context) + { + EmitExceptionCall(context, NativeInterface.Break); + } + + private static void EmitExceptionCall(ArmEmitterContext context, _Void_U64_S32 func) + { + OpCode32Exception op = (OpCode32Exception)context.CurrOp; + + context.StoreToContext(); + + context.Call(func, Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + if (context.CurrBlock.Next == null) + { + context.Return(Const(op.Address + 4)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitFlow32.cs b/ARMeilleure/Instructions/InstEmitFlow32.cs index 27addc78e..cbb9ad5b2 100644 --- a/ARMeilleure/Instructions/InstEmitFlow32.cs +++ b/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -1,7 +1,9 @@ using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitFlowHelper; using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -20,7 +22,6 @@ namespace ARMeilleure.Instructions else { context.StoreToContext(); - context.Return(Const(op.Immediate)); } } @@ -35,15 +36,6 @@ namespace ARMeilleure.Instructions Blx(context, x: true); } - public static void Bx(ArmEmitterContext context) - { - IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; - - context.StoreToContext(); - - EmitBxWritePc(context, GetIntA32(context, op.Rm)); - } - private static void Blx(ArmEmitterContext context, bool x) { IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; @@ -53,10 +45,10 @@ namespace ARMeilleure.Instructions bool isThumb = IsThumb(context.CurrOp); uint currentPc = isThumb - ? op.GetPc() | 1 - : op.GetPc() - 4; + ? pc | 1 + : pc - 4; - SetIntOrSP(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); // If x is true, then this is a branch with link and exchange. // In this case we need to swap the mode between Arm <-> Thumb. @@ -67,5 +59,37 @@ namespace ARMeilleure.Instructions InstEmitFlowHelper.EmitCall(context, (ulong)op.Immediate); } + + public static void Blxr(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + uint pc = op.GetPc(); + + Operand addr = GetIntA32(context, op.Rm); + Operand bitOne = context.BitwiseAnd(addr, Const(1)); + addr = context.BitwiseOr(addr, Const((int)CallFlag)); // Set call flag. + + bool isThumb = IsThumb(context.CurrOp); + + uint currentPc = isThumb + ? pc | 1 + : pc - 4; + + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + SetFlag(context, PState.TFlag, bitOne); + + context.Return(addr); // Call. + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + context.StoreToContext(); + + EmitBxWritePc(context, GetIntA32(context, op.Rm)); + } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitHelper.cs b/ARMeilleure/Instructions/InstEmitHelper.cs index 02e104a4f..f5495c660 100644 --- a/ARMeilleure/Instructions/InstEmitHelper.cs +++ b/ARMeilleure/Instructions/InstEmitHelper.cs @@ -43,10 +43,15 @@ namespace ARMeilleure.Instructions } else { - return GetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex)); + return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); } } + public static Operand GetVecA32(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) { if (regIndex == RegisterAlias.Aarch32Pc) @@ -57,7 +62,13 @@ namespace ARMeilleure.Instructions } else { - SetIntOrSP(context, GetRegisterAlias(context.Mode, regIndex), value); + if (value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + + context.Copy(reg, value); } } @@ -143,11 +154,12 @@ namespace ARMeilleure.Instructions context.BranchIfTrue(lblArmMode, mode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~1)))); + // Make this count as a call, the translator will ignore the low bit for the address. + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(pc, Const((int)InstEmitFlowHelper.CallFlag)))); context.MarkLabel(lblArmMode); - context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseAnd(pc, Const(~3)))); + context.Return(context.ZeroExtend32(OperandType.I64, context.BitwiseOr(context.BitwiseAnd(pc, Const(~3)), Const((int)InstEmitFlowHelper.CallFlag)))); } public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) @@ -208,11 +220,21 @@ namespace ARMeilleure.Instructions return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); } + public static Operand GetFpFlag(FPState stateFlag) + { + return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32); + } + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) { context.Copy(GetFlag(stateFlag), value); context.MarkFlagSet(stateFlag); } + + public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value) + { + context.Copy(GetFpFlag(stateFlag), value); + } } } diff --git a/ARMeilleure/Instructions/InstEmitMemory32.cs b/ARMeilleure/Instructions/InstEmitMemory32.cs index 002d2c5c6..ffd816b29 100644 --- a/ARMeilleure/Instructions/InstEmitMemory32.cs +++ b/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -20,9 +20,11 @@ namespace ARMeilleure.Instructions [Flags] enum AccessType { - Store = 0, - Signed = 1, - Load = 2, + Store = 0, + Signed = 1, + Load = 2, + Ordered = 4, + Exclusive = 8, LoadZx = Load, LoadSx = Load | Signed, @@ -95,7 +97,7 @@ namespace ARMeilleure.Instructions { OpCode32MemMult op = (OpCode32MemMult)context.CurrOp; - Operand n = GetIntA32(context, op.Rn); + Operand n = context.Copy(GetIntA32(context, op.Rn)); Operand baseAddress = context.Add(n, Const(op.Offset)); @@ -152,14 +154,15 @@ namespace ARMeilleure.Instructions OpCode32Mem op = (OpCode32Mem)context.CurrOp; Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); Operand temp = null; if (op.Index || op.WBack) { temp = op.Add - ? context.Add (n, Const(op.Immediate)) - : context.Subtract(n, Const(op.Immediate)); + ? context.Add (n, m) + : context.Subtract(n, m); } if (op.WBack) diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/ARMeilleure/Instructions/InstEmitMemoryEx.cs index bcca7619d..93c20cb58 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryEx.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -5,6 +5,7 @@ using System; using System.Diagnostics; using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; using static ARMeilleure.IntermediateRepresentation.OperandHelper; namespace ARMeilleure.Instructions @@ -66,7 +67,7 @@ namespace ARMeilleure.Instructions // method to read 128-bits atomically. if (op.Size == 2) { - Operand value = EmitLoad(context, address, exclusive, 3); + Operand value = EmitLoadExclusive(context, address, exclusive, 3); Operand valueLow = context.ConvertI64ToI32(value); @@ -79,7 +80,7 @@ namespace ARMeilleure.Instructions } else if (op.Size == 3) { - Operand value = EmitLoad(context, address, exclusive, 4); + Operand value = EmitLoadExclusive(context, address, exclusive, 4); Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); @@ -95,46 +96,11 @@ namespace ARMeilleure.Instructions else { // 8, 16, 32 or 64-bits (non-pairwise) load. - Operand value = EmitLoad(context, address, exclusive, op.Size); + Operand value = EmitLoadExclusive(context, address, exclusive, op.Size); SetIntOrZR(context, op.Rt, value); } } - - private static Operand EmitLoad( - ArmEmitterContext context, - Operand address, - bool exclusive, - int size) - { - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByteExclusive); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; - } - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _U8_U64 (NativeInterface.ReadByte); break; - case 1: fallbackMethodDlg = new _U16_U64 (NativeInterface.ReadUInt16); break; - case 2: fallbackMethodDlg = new _U32_U64 (NativeInterface.ReadUInt32); break; - case 3: fallbackMethodDlg = new _U64_U64 (NativeInterface.ReadUInt64); break; - case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; - } - } - - return context.Call(fallbackMethodDlg, address); - } - public static void Pfrm(ArmEmitterContext context) { // Memory Prefetch, execute as no-op. @@ -192,11 +158,11 @@ namespace ARMeilleure.Instructions value = context.VectorInsert(value, t2, 1); } - s = EmitStore(context, address, value, exclusive, op.Size + 1); + s = EmitStoreExclusive(context, address, value, exclusive, op.Size + 1); } else { - s = EmitStore(context, address, t, exclusive, op.Size); + s = EmitStoreExclusive(context, address, t, exclusive, op.Size); } if (s != null) @@ -207,50 +173,6 @@ namespace ARMeilleure.Instructions } } - private static Operand EmitStore( - ArmEmitterContext context, - Operand address, - Operand value, - bool exclusive, - int size) - { - if (size < 3) - { - value = context.ConvertI64ToI32(value); - } - - Delegate fallbackMethodDlg = null; - - if (exclusive) - { - switch (size) - { - case 0: fallbackMethodDlg = new _S32_U64_U8 (NativeInterface.WriteByteExclusive); break; - case 1: fallbackMethodDlg = new _S32_U64_U16 (NativeInterface.WriteUInt16Exclusive); break; - case 2: fallbackMethodDlg = new _S32_U64_U32 (NativeInterface.WriteUInt32Exclusive); break; - case 3: fallbackMethodDlg = new _S32_U64_U64 (NativeInterface.WriteUInt64Exclusive); break; - case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; - } - - return context.Call(fallbackMethodDlg, address, value); - } - else - { - switch (size) - { - case 0: fallbackMethodDlg = new _Void_U64_U8 (NativeInterface.WriteByte); break; - case 1: fallbackMethodDlg = new _Void_U64_U16 (NativeInterface.WriteUInt16); break; - case 2: fallbackMethodDlg = new _Void_U64_U32 (NativeInterface.WriteUInt32); break; - case 3: fallbackMethodDlg = new _Void_U64_U64 (NativeInterface.WriteUInt64); break; - case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; - } - - context.Call(fallbackMethodDlg, address, value); - - return null; - } - } - private static void EmitBarrier(ArmEmitterContext context) { // Note: This barrier is most likely not necessary, and probably diff --git a/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs new file mode 100644 index 000000000..0ab990f87 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryEx32.cs @@ -0,0 +1,240 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Clrex(ArmEmitterContext context) + { + context.Call(new _Void(NativeInterface.ClearExclusive)); + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldrex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Lda(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldab(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldaex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldah(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + // Stores. + + public static void Strex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Stl(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp; + + Operand address = context.Copy(GetIntA32(context, op.Rn)); + + var exclusive = (accType & AccessType.Exclusive) != 0; + var ordered = (accType & AccessType.Ordered) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + // Keep loads atomic - make the call to get the whole region and then decompose it into parts + // for the registers. + + Operand value = EmitLoadExclusive(context, address, exclusive, size); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + SetIntA32(context, op.Rt, valueLow); + SetIntA32(context, op.Rt | 1, valueHigh); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + SetIntA32(context, op.Rt | 1, valueLow); + SetIntA32(context, op.Rt, valueHigh); + + context.MarkLabel(lblEnd); + } + else + { + SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size)); + } + } + else + { + if (size == DWordSizeLog2) + { + // Split the result into 2 words (based on endianness) + + Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)); + Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt | 1)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32))); + Operand leS = EmitStoreExclusive(context, address, leResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, leS); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32))); + Operand beS = EmitStoreExclusive(context, address, beResult, exclusive, size); + if (exclusive) + { + SetIntA32(context, op.Rd, beS); + } + + context.MarkLabel(lblEnd); + } + else + { + Operand s = EmitStoreExclusive(context, address, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)), exclusive, size); + // This is only needed for exclusive stores. The function returns 0 + // when the store is successful, and 1 otherwise. + if (exclusive) + { + SetIntA32(context, op.Rd, s); + } + } + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs new file mode 100644 index 000000000..00a5385bd --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs @@ -0,0 +1,87 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryExHelper + { + public static Operand EmitLoadExclusive( + ArmEmitterContext context, + Operand address, + bool exclusive, + int size) + { + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByteExclusive); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128Exclusive); break; + } + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _U8_U64(NativeInterface.ReadByte); break; + case 1: fallbackMethodDlg = new _U16_U64(NativeInterface.ReadUInt16); break; + case 2: fallbackMethodDlg = new _U32_U64(NativeInterface.ReadUInt32); break; + case 3: fallbackMethodDlg = new _U64_U64(NativeInterface.ReadUInt64); break; + case 4: fallbackMethodDlg = new _V128_U64(NativeInterface.ReadVector128); break; + } + } + + return context.Call(fallbackMethodDlg, address); + } + + public static Operand EmitStoreExclusive( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + Delegate fallbackMethodDlg = null; + + if (exclusive) + { + switch (size) + { + case 0: fallbackMethodDlg = new _S32_U64_U8(NativeInterface.WriteByteExclusive); break; + case 1: fallbackMethodDlg = new _S32_U64_U16(NativeInterface.WriteUInt16Exclusive); break; + case 2: fallbackMethodDlg = new _S32_U64_U32(NativeInterface.WriteUInt32Exclusive); break; + case 3: fallbackMethodDlg = new _S32_U64_U64(NativeInterface.WriteUInt64Exclusive); break; + case 4: fallbackMethodDlg = new _S32_U64_V128(NativeInterface.WriteVector128Exclusive); break; + } + + return context.Call(fallbackMethodDlg, address, value); + } + else + { + switch (size) + { + case 0: fallbackMethodDlg = new _Void_U64_U8(NativeInterface.WriteByte); break; + case 1: fallbackMethodDlg = new _Void_U64_U16(NativeInterface.WriteUInt16); break; + case 2: fallbackMethodDlg = new _Void_U64_U32(NativeInterface.WriteUInt32); break; + case 3: fallbackMethodDlg = new _Void_U64_U64(NativeInterface.WriteUInt64); break; + case 4: fallbackMethodDlg = new _Void_U64_V128(NativeInterface.WriteVector128); break; + } + + context.Call(fallbackMethodDlg, address, value); + + return null; + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs index e0b44353b..70861d163 100644 --- a/ARMeilleure/Instructions/InstEmitMemoryHelper.cs +++ b/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -53,7 +53,7 @@ namespace ARMeilleure.Instructions if (!isSimd) { - Operand value = GetIntOrZR(context, rt); + Operand value = GetInt(context, rt); if (ext == Extension.Sx32 || ext == Extension.Sx64) { @@ -67,7 +67,7 @@ namespace ARMeilleure.Instructions } } - SetIntOrZR(context, rt, value); + SetInt(context, rt, value); } } @@ -505,5 +505,68 @@ namespace ARMeilleure.Instructions SetIntOrZR(context, rt, value); } } + + // ARM32 helpers. + public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + case OpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case OpCode32MemReg op: return GetIntA32(context, op.Rm); + + case OpCode32Mem op: return Const(op.Immediate); + + case OpCode32SimdMemImm op: return Const(op.Immediate); + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, OpCode32MemRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Immediate; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= false; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Immediate != 0) + { + m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift); + } + else + { + m = InstEmitAluHelper.GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } } } \ No newline at end of file diff --git a/ARMeilleure/Instructions/InstEmitMul32.cs b/ARMeilleure/Instructions/InstEmitMul32.cs new file mode 100644 index 000000000..e64f3568e --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitMul32.cs @@ -0,0 +1,290 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + [Flags] + private enum MullFlags + { + Subtract = 1, + Add = 1 << 1, + Signed = 1 << 2, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + public static void Mla(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Add(a, context.Multiply(n, m)); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mls(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Subtract(a, context.Multiply(n, m)); + + EmitAluStore(context, res); + } + + public static void Smull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void Smmla(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedAdd); + } + + public static void Smmls(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedSubtract); + } + + public static void Smmul(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.Signed); + } + + private static void EmitSmmul(ArmEmitterContext context, MullFlags flags) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf) + { + res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + else if (flags.HasFlag(MullFlags.Subtract)) + { + res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + + if (op.R) + { + res = context.Add(res, Const(0x80000000L)); + } + + Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32))); + + EmitGenericAluStoreA32(context, op.Rd, false, hi); + } + + public static void Smlab(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I32, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + Operand a = GetIntA32(context, op.Ra); + res = context.Add(res, a); + + // TODO: set Q flag when last addition overflows (saturation)? + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smlal(ArmEmitterContext context) + { + EmitMlal(context, true); + } + + public static void Smlalh(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I64, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCode32AluMla op = (OpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.ShiftRightSI(n, Const(16)); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.ShiftRightSI(m, Const(16)); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Umlal(ArmEmitterContext context) + { + EmitMlal(context, false); + } + + public static void Umull(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + + public static void EmitMlal(ArmEmitterContext context, bool signed) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (signed) + { + n = context.SignExtend32(OperandType.I64, n); + m = context.SignExtend32(OperandType.I64, m); + } + else + { + n = context.ZeroExtend32(OperandType.I64, n); + m = context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (op.SetFlags) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, op.SetFlags, hi); + EmitGenericAluStoreA32(context, op.RdLo, op.SetFlags, lo); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs new file mode 100644 index 000000000..4ee279ee0 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -0,0 +1,634 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vabs_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + + public static void Vabs_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + public static void Vadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPAdd, SoftFloat64.FPAdd, op1, op2)); + } + } + + public static void Vadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPAddFpscr, SoftFloat64.FPAddFpscr, op1, op2)); + } + } + + public static void Vadd_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vdup(ArmEmitterContext context) + { + OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp; + + Operand insert = GetIntA32(context, op.Rt); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd + 1, insert); + } + } + + public static void Vdup_1(ArmEmitterContext context) + { + OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp; + + Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + switch (op.Size) + { + case 2: + insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)); + break; + case 1: + insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)); + break; + case 0: + insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)); + break; + default: + throw new InvalidOperationException("Unknown Vdup Size."); + } + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd | 1, insert); + } + } + + public static void Vext(ArmEmitterContext context) + { + OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp; + + int elems = op.GetBytesCount(); + int byteOff = op.Immediate; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand extract; + + if (byteOff >= elems) + { + extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size); + } + else + { + extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size); + } + byteOff++; + + res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vmov_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => op1); + } + + public static void Vmovn(ArmEmitterContext context) + { + EmitVectorUnaryNarrowOp32(context, (op1) => op1); + } + + public static void Vneg_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); + } + + public static void Vnmul_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + + public static void Vnmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Negate(context.Add(op1, context.Multiply(op2, op3))); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulAdd, SoftFloat64.FPNegMulAdd, op1, op2, op3); + }); + } + } + + public static void Vnmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(context.Negate(op1), context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPNegMulSub, SoftFloat64.FPNegMulSub, op1, op2, op3); + }); + } + } + + public static void Vneg_V(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32Simd).F) + { + EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1)); + } + } + + public static void Vdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPDiv, SoftFloat64.FPDiv, op1, op2); + }); + } + } + + public static void Vmaxnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMaxNum, SoftFloat64.FPMaxNum, op1, op2)); + } + + public static void Vmaxnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxNumFpscr, SoftFloat64.FPMaxNumFpscr, op1, op2)); + } + + public static void Vminnm_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, SoftFloat32.FPMinNum, SoftFloat64.FPMinNum, op1, op2)); + } + + public static void Vminnm_V(ArmEmitterContext context) + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinNumFpscr, SoftFloat64.FPMinNumFpscr, op1, op2)); + } + + public static void Vmax_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMaxFpscr, SoftFloat64.FPMaxFpscr, op1, op2); + }); + } + + public static void Vmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2)); + } + } + + public static void Vmin_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMinFpscr, SoftFloat64.FPMinFpscr, op1, op2); + }); + } + + public static void Vmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); + } + } + + public static void Vmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2); + }); + } + } + + public static void Vmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2); + }); + } + } + + public static void Vmul_I(ArmEmitterContext context) + { + if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented"); + EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + } + + public static void Vmul_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2)); + } + } + else + { + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + + public static void Vmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3); + }); + } + } + + public static void Vmla_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3); + }); + } + } + + public static void Vmla_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + + public static void Vmla_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3); + }); + } + } + + public static void Vmls_V(ArmEmitterContext context) + { + if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3); + }); + } + } + + public static void Vmls_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + + public static void Vmls_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vpadd_V(ArmEmitterContext context) + { + EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + + public static void Vpadd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + + public static void Vrev(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => + { + switch (op.Opc) + { + case 0: + switch (op.Size) // Swap bytes. + { + default: + return op1; + case 1: + return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1); + case 2: + case 3: + return context.ByteSwap(op1); + } + case 1: + switch (op.Size) + { + default: + return op1; + case 2: + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16))); + case 3: + return context.BitwiseOr( + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))), + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16)))); + } + case 2: + // Swap upper and lower halves. + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32))); + } + + return op1; + }); + } + + public static void Vrecpe(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRecipEstimateFpscr, SoftFloat64.FPRecipEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrecpe not currently implemented."); + } + } + + public static void Vrecps(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRecipStep, SoftFloat64.FPRecipStep, op1, op2); + }); + } + + public static void Vrsqrte(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPRSqrtEstimateFpscr, SoftFloat64.FPRSqrtEstimateFpscr, op1); + }); + } + else + { + throw new NotImplementedException("Integer Vrsqrte not currently implemented."); + } + } + + public static void Vrsqrts(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPRSqrtStep, SoftFloat64.FPRSqrtStep, op1, op2); + }); + } + + public static void Vsel(ArmEmitterContext context) + { + OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp; + + Operand condition = null; + switch (op.Cc) + { + case OpCode32SimdSelMode.Eq: + condition = GetCondTrue(context, Condition.Eq); + break; + case OpCode32SimdSelMode.Ge: + condition = GetCondTrue(context, Condition.Ge); + break; + case OpCode32SimdSelMode.Gt: + condition = GetCondTrue(context, Condition.Gt); + break; + case OpCode32SimdSelMode.Vs: + condition = GetCondTrue(context, Condition.Vs); + break; + } + + EmitScalarBinaryOpI32(context, (op1, op2) => + { + return context.ConditionalSelect(condition, op1, op2); + }); + } + + public static void Vsqrt_S(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCall(context, SoftFloat32.FPSqrt, SoftFloat64.FPSqrt, op1); + }); + } + + public static void Vsub_S(ArmEmitterContext context) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_V(ArmEmitterContext context) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + + public static void Vsub_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs new file mode 100644 index 000000000..3b2483ce5 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -0,0 +1,273 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func; + + static partial class InstEmit32 + { + public static void Vceq_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); + } + + public static void Vceq_I(ArmEmitterContext context) + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false); + } + + public static void Vceq_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false); + } + } + + public static void Vcge_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); + } + + public static void Vcge_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U); + } + + public static void Vcge_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true); + } + } + + public static void Vcgt_V(ArmEmitterContext context) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); + } + + public static void Vcgt_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U); + } + + public static void Vcgt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true); + } + } + + public static void Vcle_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true); + } + } + + public static void Vclt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); + } + else + { + EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true); + } + } + + private static void EmitCmpOpF32( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + bool zero) + { + Operand one = Const(1); + if (zero) + { + EmitVectorUnaryOpF32(context, (m) => + { + OperandType type = m.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, m, ConstF(0.0), one); + } + else + { + return context.Call(f32, m, ConstF(0.0f), one); + } + }); + } + else + { + EmitVectorBinaryOpF32(context, (n, m) => + { + OperandType type = n.Type; + + if (type == OperandType.FP64) + { + return context.Call(f64, n, m, one); + } + else + { + return context.Call(f32, n, m, one); + } + }); + } + } + + private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType) + { + var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L)); + } + + private static void EmitCmpOpI32( + ArmEmitterContext context, + Func2I signedOp, + Func2I unsignedOp, + bool zero, + bool signed) + { + if (zero) + { + if (signed) + { + EmitVectorUnaryOpSx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, signedOp(m, zeroV), type); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, unsignedOp(m, zeroV), type); + }); + } + } + else + { + if (signed) + { + EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type)); + } + else + { + EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type)); + } + } + } + + public static void Vcmp(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, false); + } + + public static void Vcmpe(ArmEmitterContext context) + { + EmitVcmpOrVcmpe(context, true); + } + + private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + { + int fSize = op.Size & 1; + OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = ExtractScalar(context, type, op.Vd); + Operand me; + + if (cmpWithZero) + { + me = fSize == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = ExtractScalar(context, type, op.Vm); + } + + Delegate dlg = fSize != 0 + ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) + : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); + + Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); + + EmitSetFPSCRFlags(context, nzcv); + } + } + + private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0)); + SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1)); + SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); + SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs new file mode 100644 index 000000000..6ab089cb2 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -0,0 +1,274 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private static int FlipVdBits(int vd, bool lowBit) + { + if (lowBit) + { + // Move the low bit to the top. + return ((vd & 0x1) << 4) | (vd >> 1); + } + else + { + // Move the high bit to the bottom. + return ((vd & 0xf) << 1) | (vd >> 4); + } + } + + private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned) + { + if (op1.Type == OperandType.FP64) + { + if (unsigned) + { + return context.Call(new _U32_F64(SoftFallback.SatF64ToU32), op1); + } + else + { + return context.Call(new _S32_F64(SoftFallback.SatF64ToS32), op1); + } + + } + else + { + if (unsigned) + { + return context.Call(new _U32_F32(SoftFallback.SatF32ToU32), op1); + } + else + { + return context.Call(new _S32_F32(SoftFallback.SatF32ToS32), op1); + } + } + } + + public static void Vcvt_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + bool unsigned = (op.Opc & 1) != 0; + bool toInteger = (op.Opc & 2) != 0; + OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; + + if (toInteger) + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSaturateFloatToInt(context, op1, unsigned); + }); + } + else + { + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } + } + + } + + public static void Vcvt_FD(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + int vm = op.Vm; + int vd; + if (op.Size == 3) + { + vd = FlipVdBits(op.Vd, false); + // Double to single. + Operand fp = ExtractScalar(context, OperandType.FP64, vm); + + Operand res = context.ConvertToFP(OperandType.FP32, fp); + + InsertScalar(context, vd, res); + } + else + { + vd = FlipVdBits(op.Vd, true); + // Single to double. + Operand fp = ExtractScalar(context, OperandType.FP32, vm); + + Operand res = context.ConvertToFP(OperandType.FP64, fp); + + InsertScalar(context, vd, res); + } + } + + public static void Vcvt_FI(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + bool toInteger = (op.Opc2 & 0b100) != 0; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + if (toInteger) + { + bool unsigned = (op.Opc2 & 1) == 0; + bool roundWithFpscr = op.Opc != 1; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + Operand asInteger; + + // TODO: Fast Path. + if (roundWithFpscr) + { + // These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point. + if (floatSize == OperandType.FP64) + { + if (unsigned) + { + asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert); + } + } + else + { + if (unsigned) + { + asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert); + } + } + } + else + { + // Round towards zero. + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + } + + InsertScalar(context, op.Vd, asInteger); + } + else + { + bool unsigned = op.Opc == 0; + + Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm); + + Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned); + + InsertScalar(context, op.Vd, asFloat); + } + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + Delegate dlg; + + if ((op.Size & 1) == 0) + { + dlg = new _F32_F32_MidpointRounding(MathF.Round); + } + else /* if ((op.Size & 1) == 1) */ + { + dlg = new _F64_F64_MidpointRounding(Math.Round); + } + + return context.Call(dlg, n, Const((int)roundMode)); + } + + public static void Vcvt_R(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + bool unsigned = (op.Opc & 1) == 0; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + Operand asInteger; + + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + + InsertScalar(context, op.Vd, asInteger); + } + + public static void Vrint_RM(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (op.Opc2) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } + + InsertScalar(context, op.Vd, toConvert); + } + + public static void Vrint_Z(ArmEmitterContext context) + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1)); + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs index fce1bed5c..a87dac015 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -1528,7 +1528,7 @@ namespace ARMeilleure.Instructions { ThrowIfInvalid(index, size); - if (size < 3) + if (size < 3 && value.Type == OperandType.I64) { value = context.ConvertI64ToI32(value); } @@ -1544,7 +1544,7 @@ namespace ARMeilleure.Instructions return vector; } - private static void ThrowIfInvalid(int index, int size) + public static void ThrowIfInvalid(int index, int size) { if ((uint)size > 3u) { diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs new file mode 100644 index 000000000..b13b1d874 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -0,0 +1,581 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func; + using Func2I = Func; + using Func3I = Func; + + static class InstEmitSimdHelper32 + { + public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size) + { + switch (size) + { + case RegisterSize.Simd128: + return (index >> 1, 0); + case RegisterSize.Simd64: + case RegisterSize.Int64: + return (index >> 1, index & 1); + case RegisterSize.Int32: + return (index >> 2, index & 3); + } + + throw new ArgumentException("Unrecognized Vector Register Size."); + } + + public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg) + { + Debug.Assert(type != OperandType.V128); + + if (type == OperandType.FP64 || type == OperandType.I64) + { + // From dreg. + return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1); + } + else + { + // From sreg. + return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3); + } + } + + public static void InsertScalar(ArmEmitterContext context, int reg, Operand value) + { + Debug.Assert(value.Type != OperandType.V128); + + Operand vec, insert; + if (value.Type == OperandType.FP64 || value.Type == OperandType.I64) + { + // From dreg. + vec = GetVecA32(reg >> 1); + insert = context.VectorInsert(vec, value, reg & 1); + + } + else + { + // From sreg. + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert(vec, value, reg & 3); + } + + context.Copy(vec, insert); + } + + public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit) + { + IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + int elems = op.Elems; + (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand vec = GetVecA32(index); + Operand res = vec; + + for (int item = 0; item < elems; item++) + { + res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size); + } + + context.Copy(vec, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(m)); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32; + + if (op.Size < 2) + { + throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits."); + } + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = ExtractScalar(context, type, op.Vd); + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(a, n, m)); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Integer + + public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, true); + } + + public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, true); + } + + public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, true); + } + + public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, false); + } + + public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, false); + } + + public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, false); + } + + // Vector by scalar + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + Operand mvec = GetVecA32(op.Qm); + Operand nvec = GetVecA32(op.Qn); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex); + Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1); + + res = context.VectorInsert(res, emit(n1, n2), op.Fd + index); + + Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex); + Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1); + + res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed); + + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size); + res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Narrow + + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) + + Operand res = GetVecA32(op.Qd); + int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. + + for (int index = 0; index < elems; index++) + { + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false); + + res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Generic Functions + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_Bool f32, + _F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_Bool f32, + _F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitSoftFloatCallDefaultFpscr( + ArmEmitterContext context, + _F32_F32_F32_F32_Bool f32, + _F64_F64_F64_F64_Bool f64, + params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Delegate dlg = (op.Size & 1) == 0 ? (Delegate)f32 : (Delegate)f64; + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + return context.Call(dlg, callArgs); + } + + public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = null; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8(OperandType.I32, res); break; + case 1: res = context.SignExtend16(OperandType.I32, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8(OperandType.I32, res); break; + case 1: res = context.ZeroExtend16(OperandType.I32, res); break; + } + } + + return res; + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs new file mode 100644 index 000000000..e2e9e18ee --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -0,0 +1,56 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vand_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + + public static void Vbif(ArmEmitterContext context) + { + EmitBifBit(context, true); + } + + public static void Vbit(ArmEmitterContext context) + { + EmitBifBit(context, false); + } + + public static void Vbsl(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + + public static void Vorr_I(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryOpZx32(context, (d, n, m) => + { + if (notRm) + { + m = context.BitwiseNot(m); + } + return context.BitwiseExclusiveOr( + context.BitwiseAnd(m, + context.BitwiseExclusiveOr(d, n)), d); + }); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs new file mode 100644 index 000000000..fb9931d81 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -0,0 +1,352 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vld1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, true); + } + + public static void Vld2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, true); + } + + public static void Vld3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, true); + } + + public static void Vld4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, true); + } + + public static void Vst1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, false); + } + + public static void Vst2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, false); + } + + public static void Vst3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, false); + } + + public static void Vst4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, false); + } + + public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load) + { + if (context.CurrOp is OpCode32SimdMemSingle) + { + OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + // TODO: Check alignment. + int offset = 0; + int d = op.Vd; + + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register. + Operand address = context.Add(n, Const(offset)); + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, d); + } + else + { + EmitDVectorStore(context, address, d); + } + } + else + { + int index = ((d & 1) << (3 - op.Size)) + op.Index; + if (load) + { + if (op.Replicate) + { + var regs = (count > 1) ? 1 : op.Increment; + for (int reg = 0; reg < regs; reg++) + { + int dreg = reg + d; + int rIndex = ((dreg & 1) << (3 - op.Size)); + int limit = rIndex + (1 << (3 - op.Size)); + + while (rIndex < limit) + { + EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size); + } + } + } + else + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + } + else + { + EmitStoreSimd(context, address, d >> 1, index, op.Size); + } + } + offset += eBytes; + d += op.Increment; + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes))); + } + } + } + else + { + OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + int offset = 0; + int d = op.Vd; + + for (int reg = 0; reg < op.Regs; reg++) + { + for (int elem = 0; elem < op.Elems; elem++) + { + int elemD = d + reg; + for (int i = 0; i < count; i++) + { + // Write an element from a double simd register + // add ebytes for each element. + Operand address = context.Add(n, Const(offset)); + int index = ((elemD & 1) << (3 - op.Size)) + elem; + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, elemD); + } + else + { + EmitDVectorStore(context, address, elemD); + } + } + else + { + + if (load) + { + EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, elemD >> 1, index, op.Size); + } + } + + offset += eBytes; + elemD += op.Increment; + } + } + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs))); + } + } + } + } + + public static void Vldm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int range = op.RegisterRange; + + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int offset = 0; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + Operand vec = GetVecA32(sReg >> 2); + + EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2); + offset += byteSize; + } + } + + public static void Vstm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int offset = 0; + + int range = op.RegisterRange; + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2); + + offset += byteSize; + } + } + + public static void Vldr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Load); + } + + public static void Vstr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Store); + } + + private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand vec = GetVecA32(vecQ); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType) + { + OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); + + Operand address = op.Add + ? context.Add(n, m) + : context.Subtract(n, m); + + int size = op.Size; + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + EmitDVectorLoad(context, address, op.Vd); + } + else + { + Operand vec = GetVecA32(op.Vd >> 2); + EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + else + { + if (size == DWordSizeLog2) + { + EmitDVectorStore(context, address, op.Vd); + } + else + { + EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs new file mode 100644 index 000000000..3fd42cbf4 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -0,0 +1,336 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vmov_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => op1); + } + + public static void Vmvn_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseExclusiveOr(op1, op1)); + } + + public static void Vmov_GS(ArmEmitterContext context) + { + OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; + + Operand vec = GetVecA32(op.Vn >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); + } + } + + public static void Vmov_G1(ArmEmitterContext context) + { + OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; + + int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); + if (op.Op == 1) + { + // To general purpose. + Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand vec = GetVecA32(op.Vd >> 1); + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); + } + } + + public static void Vmov_G2(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 2); + int vm1 = op.Vm + 1; + bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); + Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); + SetIntA32(context, op.Rt, lowValue); + + Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); + SetIntA32(context, op.Rt2, highValue); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); + + Operand highValue = GetIntA32(context, op.Rt2); + + if (sameOwnerVec) + { + context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); + } + else + { + context.Copy(vec, resultVec); + context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); + } + } + } + + public static void Vmov_GD(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 1); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1); + SetIntA32(context, op.Rt, context.ConvertI64ToI32(value)); + SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32)))); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand highValue = GetIntA32(context, op.Rt2); + + Operand value = context.BitwiseOr( + context.ZeroExtend32(OperandType.I64, lowValue), + context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32))); + + context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); + } + } + + public static void Vtbl(ArmEmitterContext context) + { + OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; + + bool extension = op.Opc == 1; + + int elems = op.GetBytesCount() >> op.Size; + + int length = op.Length + 1; + + (int Qx, int Ix)[] tableTuples = new (int, int)[length]; + for (int i = 0; i < length; i++) + { + (int vn, int en) = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); + tableTuples[i] = (vn, en); + } + + int byteLength = length * 8; + + Operand res = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); + + Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); + Operand elemRes = null; // Note: This is I64 for ease of calculation. + + // TODO: Branching rather than conditional select. + + // Get indexed byte. + // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result. + // This does have to extract `length` times for every element but certainly not as bad as it could be. + + // Which vector number is the index on. + Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); + // What should we shift by to extract it. + Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); + + for (int i = 0; i < length; i++) + { + (int qx, int ix) = tableTuples[i]; + // Get the whole vector, we'll get a byte out of it. + Operand lookupResult; + if (qx == op.Qd) + { + // Result contains the current state of the vector. + lookupResult = context.VectorExtract(OperandType.I64, res, ix); + } + else + { + lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64 + } + + lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector. + + if (i == 0) + { + elemRes = lookupResult; // First result is always default. + } + else + { + Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); + elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); + } + } + + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); + + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vtrn(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); + + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vzip(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); + + Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); + + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + + public static void Vuzp(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand dIns, mIns; + if (index >= pairs) + { + int pind = index - pairs; + dIns = EmitVectorExtract32(context, op.Qm, (pind << 1) + op.Im, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qm, ((pind << 1) | 1) + op.Im, op.Size, false); + } + else + { + dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); + } + + resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs new file mode 100644 index 000000000..893854763 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vshl(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift))); + } + + public static void Vshl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false)); + } + } + + public static void Vshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift))); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift)))); + } + } + + public static void Vshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped. + + EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); + } + + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned) + { + if (shiftLsB.Type == OperandType.I64) + { + shiftLsB = context.ConvertI64ToI32(shiftLsB); + } + + shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB); + Debug.Assert((uint)size < 4u); + + Operand negShiftLsB = context.Negate(shiftLsB); + + Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0)); + + Operand shl = context.ShiftLeft(op, shiftLsB); + Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB); + + Operand res = context.ConditionalSelect(isPositive, shl, shr); + + if (unsigned) + { + Operand isOutOfRange = context.BitwiseOr( + context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)), + context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size))); + + return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res); + } + else + { + Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)); + Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)); + + // Also zero if shift is too negative, but value was positive. + isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0)))); + + Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res)); + } + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSystem32.cs b/ARMeilleure/Instructions/InstEmitSystem32.cs new file mode 100644 index 000000000..808b4fdd7 --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSystem32.cs @@ -0,0 +1,233 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.OperandHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Mcr(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _Void_U32(NativeInterface.SetTpidrEl032); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + + case 7: + switch (op.CRm) // Cache and Memory barrier. + { + case 10: + switch (op.Opc2) + { + case 5: // Data Memory Barrier Register. + return; // No-op. + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + default: + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + public static void Mrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + if (op.Opc1 != 0) + { + throw new NotImplementedException($"Unknown MRC Opc1 0x{op.Opc1:X16} at 0x{op.Address:X16}."); + } + + Delegate dlg; + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16}."); + } + switch (op.Opc2) + { + case 2: + dlg = new _U32(NativeInterface.GetTpidrEl032); break; + case 3: + dlg = new _U32(NativeInterface.GetTpidr32); break; + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + if (op.Rt == RegisterAlias.Aarch32Pc) + { + // Special behavior: copy NZCV flags into APSR. + EmitSetNzcv(context, context.Call(dlg)); + + return; + } + else + { + SetIntA32(context, op.Rt, context.Call(dlg)); + } + } + + public static void Mrrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + throw new NotImplementedException($"Unknown MRC Coprocessor ID 0x{op.Coproc:X16} at 0x{op.Address:X16}."); + } + + var opc = op.MrrcOp; + + Delegate dlg; + switch (op.CRm) + { + case 14: // Timer. + switch (opc) + { + case 0: + dlg = new _U64(NativeInterface.GetCntpctEl0); break; + default: + throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X16} at 0x{op.Address:X16}."); + } + break; + default: + throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + Operand result = context.Call(dlg); + + SetIntA32(context, op.Rt, context.ConvertI64ToI32(result)); + SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32)))); + } + + public static void Nop(ArmEmitterContext context) { } + + public static void Vmrs(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001) + { + // Special behavior: copy NZCV flags into APSR. + SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag)); + SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag)); + SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag)); + SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag)); + return; + } + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _U32(NativeInterface.GetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntA32(context, op.Rt, context.Call(dlg)); + } + + public static void Vmsr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + Delegate dlg; + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + dlg = new _Void_U32(NativeInterface.SetFpscr); break; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + context.Call(dlg, GetIntA32(context, op.Rt)); + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand t) + { + Operand v = context.ShiftRightUI(t, Const((int)PState.VFlag)); + v = context.BitwiseAnd(v, Const(1)); + + Operand c = context.ShiftRightUI(t, Const((int)PState.CFlag)); + c = context.BitwiseAnd(c, Const(1)); + + Operand z = context.ShiftRightUI(t, Const((int)PState.ZFlag)); + z = context.BitwiseAnd(z, Const(1)); + + Operand n = context.ShiftRightUI(t, Const((int)PState.NFlag)); + n = context.BitwiseAnd(n, Const(1)); + + SetFlag(context, PState.VFlag, v); + SetFlag(context, PState.CFlag, c); + SetFlag(context, PState.ZFlag, z); + SetFlag(context, PState.NFlag, n); + } + } +} diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index c81484a6f..0c2dd18d4 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -82,6 +82,7 @@ namespace ARMeilleure.Instructions Smaddl, Smsubl, Smulh, + Smull, Stlr, Stlxp, Stlxr, @@ -92,6 +93,8 @@ namespace ARMeilleure.Instructions Sub, Subs, Svc, + Sxtb, + Sxth, Sys, Tbnz, Tbz, @@ -445,19 +448,140 @@ namespace ARMeilleure.Instructions Zip2_V, // Base (AArch32) + Bfc, + Bfi, Blx, Bx, Cmp, + Cmn, + Movt, + Mul, + Lda, + Ldab, + Ldaex, + Ldaexb, + Ldaexd, + Ldaexh, + Ldah, Ldm, Ldrb, Ldrd, + Ldrex, + Ldrexb, + Ldrexd, + Ldrexh, Ldrh, Ldrsb, Ldrsh, + Mcr, + Mla, + Mls, Mov, + Mrc, + Mrrc, + Mvn, + Pkh, + Pld, + Rev, + Revsh, + Rsb, + Rsc, + Sbfx, + Smlab, + Smlal, + Smlalh, + Smmla, + Smmls, + Smmul, + Stl, + Stlb, + Stlex, + Stlexb, + Stlexd, + Stlexh, + Stlh, Stm, Strb, Strd, - Strh + Strex, + Strexb, + Strexd, + Strexh, + Strh, + Sxtb16, + Teq, + Trap, + Tst, + Ubfx, + Umlal, + Umull, + Uxtb, + Uxtb16, + Uxth, + + // FP & SIMD (AArch32) + Vabs, + Vadd, + Vand, + Vbif, + Vbit, + Vbsl, + Vceq, + Vcge, + Vcgt, + Vcle, + Vclt, + Vcmp, + Vcmpe, + Vcvt, + Vdiv, + Vdup, + Vext, + Vld1, + Vld2, + Vld3, + Vld4, + Vldm, + Vldr, + Vmax, + Vmaxnm, + Vmin, + Vminnm, + Vmla, + Vmls, + Vmov, + Vmovn, + Vmrs, + Vmsr, + Vmul, + Vmvn, + Vneg, + Vnmul, + Vnmla, + Vnmls, + Vorr, + Vpadd, + Vrev, + Vrint, + Vsel, + Vshl, + Vshr, + Vshrn, + Vst1, + Vst2, + Vst3, + Vst4, + Vstm, + Vstr, + Vsqrt, + Vrecpe, + Vrecps, + Vrsqrte, + Vrsqrts, + Vsub, + Vtbl, + Vtrn, + Vuzp, + Vzip, } } diff --git a/ARMeilleure/Instructions/NativeInterface.cs b/ARMeilleure/Instructions/NativeInterface.cs index 3a1e91c8e..988e86bd7 100644 --- a/ARMeilleure/Instructions/NativeInterface.cs +++ b/ARMeilleure/Instructions/NativeInterface.cs @@ -87,16 +87,39 @@ namespace ARMeilleure.Instructions return (ulong)GetContext().Fpsr; } + public static uint GetFpscr() + { + ExecutionContext context = GetContext(); + uint result = (uint)(context.Fpsr & FPSR.A32Mask) | (uint)(context.Fpcr & FPCR.A32Mask); + + result |= context.GetFPstateFlag(FPState.NFlag) ? (1u << 31) : 0; + result |= context.GetFPstateFlag(FPState.ZFlag) ? (1u << 30) : 0; + result |= context.GetFPstateFlag(FPState.CFlag) ? (1u << 29) : 0; + result |= context.GetFPstateFlag(FPState.VFlag) ? (1u << 28) : 0; + + return result; + } + public static ulong GetTpidrEl0() { return (ulong)GetContext().TpidrEl0; } + public static uint GetTpidrEl032() + { + return (uint)GetContext().TpidrEl0; + } + public static ulong GetTpidr() { return (ulong)GetContext().Tpidr; } + public static uint GetTpidr32() + { + return (uint)GetContext().Tpidr; + } + public static ulong GetCntfrqEl0() { return GetContext().CntfrqEl0; @@ -117,13 +140,31 @@ namespace ARMeilleure.Instructions GetContext().Fpsr = (FPSR)value; } + public static void SetFpscr(uint value) + { + ExecutionContext context = GetContext(); + + context.SetFPstateFlag(FPState.NFlag, (value & (1u << 31)) != 0); + context.SetFPstateFlag(FPState.ZFlag, (value & (1u << 30)) != 0); + context.SetFPstateFlag(FPState.CFlag, (value & (1u << 29)) != 0); + context.SetFPstateFlag(FPState.VFlag, (value & (1u << 28)) != 0); + + context.Fpsr = FPSR.A32Mask & (FPSR)value; + context.Fpcr = FPCR.A32Mask & (FPCR)value; + } + public static void SetTpidrEl0(ulong value) { GetContext().TpidrEl0 = (long)value; } -#endregion -#region "Read" + public static void SetTpidrEl032(uint value) + { + GetContext().TpidrEl0 = (long)value; + } + #endregion + + #region "Read" public static byte ReadByte(ulong address) { return GetMemoryManager().ReadByte((long)address); diff --git a/ARMeilleure/Instructions/SoftFallback.cs b/ARMeilleure/Instructions/SoftFallback.cs index 10bb47df5..611e8d6ac 100644 --- a/ARMeilleure/Instructions/SoftFallback.cs +++ b/ARMeilleure/Instructions/SoftFallback.cs @@ -420,6 +420,26 @@ namespace ARMeilleure.Instructions return MathF.Truncate(value); } } + + public static int FloatToInt32(float value) + { + return SatF32ToS32(RoundF(value)); + } + + public static int DoubleToInt32(double value) + { + return SatF64ToS32(Round(value)); + } + + public static uint FloatToUInt32(float value) + { + return SatF32ToU32(RoundF(value)); + } + + public static uint DoubleToUInt32(double value) + { + return SatF64ToU32(Round(value)); + } #endregion #region "Saturation" diff --git a/ARMeilleure/Instructions/SoftFloat.cs b/ARMeilleure/Instructions/SoftFloat.cs index 256bc5b97..d3e15a2ce 100644 --- a/ARMeilleure/Instructions/SoftFloat.cs +++ b/ARMeilleure/Instructions/SoftFloat.cs @@ -121,7 +121,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -622,13 +622,19 @@ namespace ARMeilleure.Instructions static class SoftFloat32 { public static float FPAdd(float value1, float value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static float FPAddFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -639,7 +645,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -657,7 +663,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -672,9 +678,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(float value1, float value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -684,7 +691,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -708,10 +715,16 @@ namespace ARMeilleure.Instructions public static float FPCompareEQ(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareEQFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -721,7 +734,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -734,10 +747,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGE(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGEFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -745,7 +764,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -757,10 +776,16 @@ namespace ARMeilleure.Instructions public static float FPCompareGT(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGTFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); float result; @@ -768,7 +793,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -788,14 +813,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static float FPDiv(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -806,7 +842,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -814,7 +850,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -825,7 +861,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -838,13 +874,19 @@ namespace ARMeilleure.Instructions } public static float FPMax(float value1, float value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static float FPMaxFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -877,7 +919,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -892,10 +934,16 @@ namespace ARMeilleure.Instructions public static float FPMaxNum(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMaxNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -906,17 +954,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static float FPMin(float value1, float value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static float FPMinFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -949,7 +1003,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -964,10 +1018,16 @@ namespace ARMeilleure.Instructions public static float FPMinNum(float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMinNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -978,17 +1038,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static float FPMul(float value1, float value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static float FPMulFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -999,7 +1065,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -1013,7 +1079,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1027,22 +1093,28 @@ namespace ARMeilleure.Instructions public static float FPMulAdd(float valueA, float value1, float value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMulAddFpscr(valueA, value1, value2, false); + } - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -1057,7 +1129,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -1075,7 +1147,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1094,14 +1166,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static float FPMulX(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1124,7 +1204,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1153,15 +1233,21 @@ namespace ARMeilleure.Instructions public static float FPRecipEstimate(float value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRecipEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + public static float FPRecipEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -1171,13 +1257,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (MathF.Abs(value) < MathF.Pow(2f, -128)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -1188,10 +1274,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) { result = FPZero(sign); @@ -1240,16 +1326,49 @@ namespace ARMeilleure.Instructions return result; } + public static float FPRecipStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static float FPRecipStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1268,7 +1387,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 2f); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1283,14 +1402,15 @@ namespace ARMeilleure.Instructions public static float FPRecpX(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -1306,27 +1426,33 @@ namespace ARMeilleure.Instructions public static float FPRSqrtEstimate(float value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRSqrtEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out uint op, context); + public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -1369,16 +1495,95 @@ namespace ARMeilleure.Instructions return result; } + public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0f; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRSqrtStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static float FPRSqrtStepFused(float value1, float value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1397,7 +1602,7 @@ namespace ARMeilleure.Instructions { result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1412,14 +1617,15 @@ namespace ARMeilleure.Instructions public static float FPSqrt(float value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out uint op, context); + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); float result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -1433,13 +1639,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = MathF.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1451,13 +1657,19 @@ namespace ARMeilleure.Instructions } public static float FPSub(float value1, float value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static float FPSubFpscr(float value1, float value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); - float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1468,7 +1680,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -1486,7 +1698,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1500,7 +1712,7 @@ namespace ARMeilleure.Instructions private static float FPDefaultNaN() { - return -float.NaN; + return BitConverter.Int32BitsToSingle(0x7fc00000); } private static float FPInfinity(bool sign) @@ -1523,6 +1735,11 @@ namespace ARMeilleure.Instructions return sign ? -2f : +2f; } + private static float FPThree(bool sign) + { + return sign ? -3f : +3f; + } + private static float FPOnePointFive(bool sign) { return sign ? -1.5f : +1.5f; @@ -1543,7 +1760,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out uint valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (uint)BitConverter.SingleToInt32Bits(value); @@ -1551,14 +1769,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7F800000u) == 0u) { - if ((valueBits & 0x007FFFFFu) == 0u || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x007FFFFFu) != 0u) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -1592,25 +1810,26 @@ namespace ARMeilleure.Instructions uint op1, uint op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -1626,33 +1845,34 @@ namespace ARMeilleure.Instructions uint op2, uint op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -1660,16 +1880,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static float FPProcessNaN(FPType type, uint op, ExecutionContext context) + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1u << 22; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -1677,11 +1897,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int32BitsToSingle((int)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } @@ -1695,13 +1915,19 @@ namespace ARMeilleure.Instructions static class SoftFloat64 { public static double FPAdd(double value1, double value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static double FPAddFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1712,7 +1938,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && !sign2)) { @@ -1730,7 +1956,7 @@ namespace ARMeilleure.Instructions { result = value1 + value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1745,9 +1971,10 @@ namespace ARMeilleure.Instructions public static int FPCompare(double value1, double value2, bool signalNaNs) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); int result; @@ -1757,7 +1984,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1781,10 +2008,16 @@ namespace ARMeilleure.Instructions public static double FPCompareEQ(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareEQFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1794,7 +2027,7 @@ namespace ARMeilleure.Instructions if (type1 == FPType.SNaN || type2 == FPType.SNaN) { - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } } else @@ -1807,10 +2040,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGE(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGEFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1818,7 +2057,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1830,10 +2069,16 @@ namespace ARMeilleure.Instructions public static double FPCompareGT(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPCompareGTFpscr(value1, value2, false); + } - value1 = value1.FPUnpack(out FPType type1, out _, out _, context); - value2 = value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); double result; @@ -1841,7 +2086,7 @@ namespace ARMeilleure.Instructions { result = ZerosOrOnes(false); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { @@ -1861,14 +2106,25 @@ namespace ARMeilleure.Instructions return FPCompareGT(value2, value1); } + public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + public static double FPDiv(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1879,7 +2135,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || zero2) { @@ -1887,7 +2143,7 @@ namespace ARMeilleure.Instructions if (!inf1) { - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } } else if (zero1 || inf2) @@ -1898,7 +2154,7 @@ namespace ARMeilleure.Instructions { result = value1 / value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1911,13 +2167,19 @@ namespace ARMeilleure.Instructions } public static double FPMax(double value1, double value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static double FPMaxFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -1950,7 +2212,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -1965,10 +2227,16 @@ namespace ARMeilleure.Instructions public static double FPMaxNum(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMaxNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -1979,17 +2247,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(true); } - return FPMax(value1, value2); + return FPMaxFpscr(value1, value2, standardFpscr); } public static double FPMin(double value1, double value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static double FPMinFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2022,7 +2296,7 @@ namespace ARMeilleure.Instructions { result = value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2037,10 +2311,16 @@ namespace ARMeilleure.Instructions public static double FPMinNum(double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMinNumFpscr(value1, value2, false); + } - value1.FPUnpack(out FPType type1, out _, out _, context); - value2.FPUnpack(out FPType type2, out _, out _, context); + public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); if (type1 == FPType.QNaN && type2 != FPType.QNaN) { @@ -2051,17 +2331,23 @@ namespace ARMeilleure.Instructions value2 = FPInfinity(false); } - return FPMin(value1, value2); + return FPMinFpscr(value1, value2, standardFpscr); } public static double FPMul(double value1, double value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static double FPMulFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2072,7 +2358,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (inf1 || inf2) { @@ -2086,7 +2372,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2100,22 +2386,28 @@ namespace ARMeilleure.Instructions public static double FPMulAdd(double valueA, double value1, double value2) { - ExecutionContext context = NativeInterface.GetContext(); + return FPMulAddFpscr(valueA, value1, value2, false); + } - valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; - double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context); + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } if (!done) @@ -2130,7 +2422,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((infA && !signA) || (infP && !signP)) { @@ -2148,7 +2440,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, valueA); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2167,14 +2459,22 @@ namespace ARMeilleure.Instructions return FPMulAdd(valueA, value1, value2); } + public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + public static double FPMulX(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2197,7 +2497,7 @@ namespace ARMeilleure.Instructions { result = value1 * value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2226,15 +2526,21 @@ namespace ARMeilleure.Instructions public static double FPRecipEstimate(double value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRecipEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + public static double FPRecipEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Infinity) { @@ -2244,13 +2550,13 @@ namespace ARMeilleure.Instructions { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (Math.Abs(value) < Math.Pow(2d, -1024)) { bool overflowToInf; - switch (context.Fpcr.GetRoundingMode()) + switch (fpcr.GetRoundingMode()) { default: case FPRoundingMode.ToNearest: overflowToInf = true; break; @@ -2261,10 +2567,10 @@ namespace ARMeilleure.Instructions result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); - FPProcessException(FPException.Overflow, context); - FPProcessException(FPException.Inexact, context); + FPProcessException(FPException.Overflow, context, fpcr); + FPProcessException(FPException.Inexact, context, fpcr); } - else if ((context.Fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) { result = FPZero(sign); @@ -2313,16 +2619,49 @@ namespace ARMeilleure.Instructions return result; } + public static double FPRecipStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + public static double FPRecipStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2341,7 +2680,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 2d); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2356,14 +2695,15 @@ namespace ARMeilleure.Instructions public static double FPRecpX(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else { @@ -2379,27 +2719,33 @@ namespace ARMeilleure.Instructions public static double FPRSqrtEstimate(double value) { - ExecutionContext context = NativeInterface.GetContext(); + return FPRSqrtEstimateFpscr(value, false); + } - value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { result = FPInfinity(sign); - FPProcessException(FPException.DivideByZero, context); + FPProcessException(FPException.DivideByZero, context, fpcr); } else if (sign) { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if (type == FPType.Infinity) { @@ -2442,16 +2788,95 @@ namespace ARMeilleure.Instructions return result; } + public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRSqrtStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + public static double FPRSqrtStepFused(double value1, double value2) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; value1 = value1.FPNeg(); - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2470,7 +2895,7 @@ namespace ARMeilleure.Instructions { result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2485,14 +2910,15 @@ namespace ARMeilleure.Instructions public static double FPSqrt(double value) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; - value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context); + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); double result; if (type == FPType.SNaN || type == FPType.QNaN) { - result = FPProcessNaN(type, op, context); + result = FPProcessNaN(type, op, context, fpcr); } else if (type == FPType.Zero) { @@ -2506,13 +2932,13 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else { result = Math.Sqrt(value); - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2524,13 +2950,19 @@ namespace ARMeilleure.Instructions } public static double FPSub(double value1, double value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static double FPSubFpscr(double value1, double value2, bool standardFpscr) { ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; - value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context); - value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); - double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context); + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); if (!done) { @@ -2541,7 +2973,7 @@ namespace ARMeilleure.Instructions { result = FPDefaultNaN(); - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } else if ((inf1 && !sign1) || (inf2 && sign2)) { @@ -2559,7 +2991,7 @@ namespace ARMeilleure.Instructions { result = value1 - value2; - if ((context.Fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) { context.Fpsr |= FPSR.Ufc; @@ -2573,7 +3005,7 @@ namespace ARMeilleure.Instructions private static double FPDefaultNaN() { - return -double.NaN; + return BitConverter.Int64BitsToDouble(0x7ff8000000000000); } private static double FPInfinity(bool sign) @@ -2596,6 +3028,11 @@ namespace ARMeilleure.Instructions return sign ? -2d : +2d; } + private static double FPThree(bool sign) + { + return sign ? -3d : +3d; + } + private static double FPOnePointFive(bool sign) { return sign ? -1.5d : +1.5d; @@ -2616,7 +3053,8 @@ namespace ARMeilleure.Instructions out FPType type, out bool sign, out ulong valueBits, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); @@ -2624,14 +3062,14 @@ namespace ARMeilleure.Instructions if ((valueBits & 0x7FF0000000000000ul) == 0ul) { - if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (context.Fpcr & FPCR.Fz) != 0) + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0) { type = FPType.Zero; value = FPZero(sign); if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) { - FPProcessException(FPException.InputDenorm, context); + FPProcessException(FPException.InputDenorm, context, fpcr); } } else @@ -2665,25 +3103,26 @@ namespace ARMeilleure.Instructions ulong op1, ulong op2, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } done = false; @@ -2699,33 +3138,34 @@ namespace ARMeilleure.Instructions ulong op2, ulong op3, out bool done, - ExecutionContext context) + ExecutionContext context, + FPCR fpcr) { done = true; if (type1 == FPType.SNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.SNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.SNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } else if (type1 == FPType.QNaN) { - return FPProcessNaN(type1, op1, context); + return FPProcessNaN(type1, op1, context, fpcr); } else if (type2 == FPType.QNaN) { - return FPProcessNaN(type2, op2, context); + return FPProcessNaN(type2, op2, context, fpcr); } else if (type3 == FPType.QNaN) { - return FPProcessNaN(type3, op3, context); + return FPProcessNaN(type3, op3, context, fpcr); } done = false; @@ -2733,16 +3173,16 @@ namespace ARMeilleure.Instructions return FPZero(false); } - private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context) + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr) { if (type == FPType.SNaN) { op |= 1ul << 51; - FPProcessException(FPException.InvalidOp, context); + FPProcessException(FPException.InvalidOp, context, fpcr); } - if ((context.Fpcr & FPCR.Dn) != 0) + if ((fpcr & FPCR.Dn) != 0) { return FPDefaultNaN(); } @@ -2750,11 +3190,11 @@ namespace ARMeilleure.Instructions return BitConverter.Int64BitsToDouble((long)op); } - private static void FPProcessException(FPException exc, ExecutionContext context) + private static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) { int enable = (int)exc + 8; - if ((context.Fpcr & (FPCR)(1 << enable)) != 0) + if ((fpcr & (FPCR)(1 << enable)) != 0) { throw new NotImplementedException("Floating-point trap handling."); } diff --git a/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/ARMeilleure/IntermediateRepresentation/RegisterType.cs index e71795cb9..88ac6c124 100644 --- a/ARMeilleure/IntermediateRepresentation/RegisterType.cs +++ b/ARMeilleure/IntermediateRepresentation/RegisterType.cs @@ -4,6 +4,7 @@ namespace ARMeilleure.IntermediateRepresentation { Integer, Vector, - Flag + Flag, + FpFlag } } \ No newline at end of file diff --git a/ARMeilleure/State/ExecutionContext.cs b/ARMeilleure/State/ExecutionContext.cs index 406766803..482665dbf 100644 --- a/ARMeilleure/State/ExecutionContext.cs +++ b/ARMeilleure/State/ExecutionContext.cs @@ -36,6 +36,7 @@ namespace ARMeilleure.State public FPCR Fpcr { get; set; } public FPSR Fpsr { get; set; } + public FPCR StandardFpcrValue => (Fpcr & (FPCR.Ahp)) | FPCR.Dn | FPCR.Fz; public bool IsAarch32 { get; set; } @@ -90,6 +91,9 @@ namespace ARMeilleure.State public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag); public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value); + public bool GetFPstateFlag(FPState flag) => _nativeContext.GetFPStateFlag(flag); + public void SetFPstateFlag(FPState flag, bool value) => _nativeContext.SetFPStateFlag(flag, value); + internal void CheckInterrupt() { if (_interrupted) diff --git a/ARMeilleure/State/FPCR.cs b/ARMeilleure/State/FPCR.cs index 511681fa9..913065ea3 100644 --- a/ARMeilleure/State/FPCR.cs +++ b/ARMeilleure/State/FPCR.cs @@ -3,12 +3,14 @@ using System; namespace ARMeilleure.State { [Flags] - public enum FPCR + public enum FPCR : uint { Ufe = 1 << 11, Fz = 1 << 24, Dn = 1 << 25, - Ahp = 1 << 26 + Ahp = 1 << 26, + + A32Mask = 0x07ffff00 } public static class FPCRExtensions diff --git a/ARMeilleure/State/FPSR.cs b/ARMeilleure/State/FPSR.cs index c20dc4393..47323b35c 100644 --- a/ARMeilleure/State/FPSR.cs +++ b/ARMeilleure/State/FPSR.cs @@ -3,9 +3,11 @@ using System; namespace ARMeilleure.State { [Flags] - public enum FPSR + public enum FPSR : uint { Ufc = 1 << 3, - Qc = 1 << 27 + Qc = 1 << 27, + + A32Mask = 0xf800000f } } diff --git a/ARMeilleure/State/FPState.cs b/ARMeilleure/State/FPState.cs new file mode 100644 index 000000000..2fe2a567e --- /dev/null +++ b/ARMeilleure/State/FPState.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPState + { + VFlag = 28, + CFlag = 29, + ZFlag = 30, + NFlag = 31 + } +} diff --git a/ARMeilleure/State/NativeContext.cs b/ARMeilleure/State/NativeContext.cs index 4e6a5302f..eb54505c6 100644 --- a/ARMeilleure/State/NativeContext.cs +++ b/ARMeilleure/State/NativeContext.cs @@ -14,7 +14,8 @@ namespace ARMeilleure.State private const int TotalSize = RegisterConsts.IntRegsCount * IntSize + RegisterConsts.VecRegsCount * VecSize + - RegisterConsts.FlagsCount * FlagSize + ExtraSize; + RegisterConsts.FlagsCount * FlagSize + + RegisterConsts.FpFlagsCount * FlagSize + ExtraSize; public IntPtr BasePtr { get; } @@ -100,6 +101,38 @@ namespace ARMeilleure.State Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0); } + public bool GetFPStateFlag(FPState flag) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + (int)flag * FlagSize; + + int value = Marshal.ReadInt32(BasePtr, offset); + + return value != 0; + } + + public void SetFPStateFlag(FPState flag, bool value) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + int offset = + RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + (int)flag * FlagSize; + + Marshal.WriteInt32(BasePtr, offset, value ? 1 : 0); + } + public int GetCounter() { return Marshal.ReadInt32(BasePtr, GetCounterOffset()); @@ -144,9 +177,10 @@ namespace ARMeilleure.State public static int GetCounterOffset() { - return RegisterConsts.IntRegsCount * IntSize + - RegisterConsts.VecRegsCount * VecSize + - RegisterConsts.FlagsCount * FlagSize; + return RegisterConsts.IntRegsCount * IntSize + + RegisterConsts.VecRegsCount * VecSize + + RegisterConsts.FlagsCount * FlagSize + + RegisterConsts.FpFlagsCount * FlagSize; } public void Dispose() diff --git a/ARMeilleure/State/RegisterAlias.cs b/ARMeilleure/State/RegisterAlias.cs index ae0d45628..7ebfa2753 100644 --- a/ARMeilleure/State/RegisterAlias.cs +++ b/ARMeilleure/State/RegisterAlias.cs @@ -32,6 +32,7 @@ namespace ARMeilleure.State public const int SpFiq = 29; public const int LrFiq = 30; + public const int Aarch32Sp = 13; public const int Aarch32Lr = 14; public const int Aarch32Pc = 15; diff --git a/ARMeilleure/State/RegisterConsts.cs b/ARMeilleure/State/RegisterConsts.cs index a85117bb2..d62940808 100644 --- a/ARMeilleure/State/RegisterConsts.cs +++ b/ARMeilleure/State/RegisterConsts.cs @@ -5,8 +5,10 @@ namespace ARMeilleure.State public const int IntRegsCount = 32; public const int VecRegsCount = 32; public const int FlagsCount = 32; + public const int FpFlagsCount = 32; public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount; - public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount; + public const int FpFlagsOffset = IntRegsCount + VecRegsCount + FlagsCount; + public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount + FpFlagsCount; public const int ZeroIndex = 31; } diff --git a/ARMeilleure/Translation/RegisterUsage.cs b/ARMeilleure/Translation/RegisterUsage.cs index becaa24cd..84dfce7b5 100644 --- a/ARMeilleure/Translation/RegisterUsage.cs +++ b/ARMeilleure/Translation/RegisterUsage.cs @@ -10,6 +10,7 @@ namespace ARMeilleure.Translation { private const long CallerSavedIntRegistersMask = 0x7fL << 9; private const long PStateNzcvFlagsMask = 0xfL << 60; + private const long FpStateNzcvFlagsMask = 0xfL << 60; private const long CallerSavedVecRegistersMask = 0xffffL << 16; @@ -68,7 +69,7 @@ namespace ARMeilleure.Translation } } - public static void RunPass(ControlFlowGraph cfg, bool isCompleteFunction) + public static void RunPass(ControlFlowGraph cfg, ExecutionMode mode, bool isCompleteFunction) { // Compute local register inputs and outputs used inside blocks. RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count]; @@ -205,8 +206,8 @@ namespace ARMeilleure.Translation // It always needs a context load as it is the first block to run. if (block.Predecessors.Count == 0 || hasContextLoad) { - LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector); - LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer); + LoadLocals(block, globalInputs[block.Index].VecMask, RegisterType.Vector, mode); + LoadLocals(block, globalInputs[block.Index].IntMask, RegisterType.Integer, mode); } bool hasContextStore = HasContextStore(block); @@ -218,8 +219,8 @@ namespace ARMeilleure.Translation if (EndsWithReturn(block) || hasContextStore) { - StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, isCompleteFunction); - StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, isCompleteFunction); + StoreLocals(block, globalOutputs[block.Index].IntMask, RegisterType.Integer, mode, isCompleteFunction); + StoreLocals(block, globalOutputs[block.Index].VecMask, RegisterType.Vector, mode, isCompleteFunction); } } } @@ -263,6 +264,7 @@ namespace ARMeilleure.Translation { case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break; case RegisterType.Integer: intMask = 1L << register.Index; break; + case RegisterType.FpFlag: vecMask = (1L << RegsCount) << register.Index; break; case RegisterType.Vector: vecMask = 1L << register.Index; break; } @@ -278,7 +280,7 @@ namespace ARMeilleure.Translation return oldValue != value; } - private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType) + private static void LoadLocals(BasicBlock block, long inputs, RegisterType baseType, ExecutionMode mode) { Operand arg0 = Local(OperandType.I64); @@ -291,7 +293,7 @@ namespace ARMeilleure.Translation continue; } - Operand dest = GetRegFromBit(bit, baseType); + Operand dest = GetRegFromBit(bit, baseType, mode); long offset = NativeContext.GetRegisterOffset(dest.GetRegister()); @@ -311,7 +313,7 @@ namespace ARMeilleure.Translation block.Operations.AddFirst(loadArg0); } - private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, bool isCompleteFunction) + private static void StoreLocals(BasicBlock block, long outputs, RegisterType baseType, ExecutionMode mode, bool isCompleteFunction) { if (Optimizations.AssumeStrictAbiCompliance && isCompleteFunction) { @@ -319,7 +321,7 @@ namespace ARMeilleure.Translation { outputs = ClearCallerSavedIntRegs(outputs); } - else /* if (baseType == RegisterType.Vector) */ + else /* if (baseType == RegisterType.Vector || baseType == RegisterType.FpFlag) */ { outputs = ClearCallerSavedVecRegs(outputs); } @@ -340,7 +342,7 @@ namespace ARMeilleure.Translation continue; } - Operand source = GetRegFromBit(bit, baseType); + Operand source = GetRegFromBit(bit, baseType, mode); long offset = NativeContext.GetRegisterOffset(source.GetRegister()); @@ -356,28 +358,33 @@ namespace ARMeilleure.Translation } } - private static Operand GetRegFromBit(int bit, RegisterType baseType) + private static Operand GetRegFromBit(int bit, RegisterType baseType, ExecutionMode mode) { if (bit < RegsCount) { - return new Operand(bit, baseType, GetOperandType(baseType)); + return new Operand(bit, baseType, GetOperandType(baseType, mode)); } else if (baseType == RegisterType.Integer) { return new Operand(bit & RegsMask, RegisterType.Flag, OperandType.I32); } + else if (baseType == RegisterType.Vector) + { + return new Operand(bit & RegsMask, RegisterType.FpFlag, OperandType.I32); + } else { throw new ArgumentOutOfRangeException(nameof(bit)); } } - private static OperandType GetOperandType(RegisterType type) + private static OperandType GetOperandType(RegisterType type, ExecutionMode mode) { switch (type) { case RegisterType.Flag: return OperandType.I32; - case RegisterType.Integer: return OperandType.I64; + case RegisterType.FpFlag: return OperandType.I32; + case RegisterType.Integer: return (mode == ExecutionMode.Aarch64) ? OperandType.I64 : OperandType.I32; case RegisterType.Vector: return OperandType.V128; } @@ -405,7 +412,7 @@ namespace ARMeilleure.Translation private static long ClearCallerSavedVecRegs(long mask) { // TODO: ARM32 support. - mask &= ~CallerSavedVecRegistersMask; + mask &= ~(CallerSavedVecRegistersMask | FpStateNzcvFlagsMask); return mask; } diff --git a/ARMeilleure/Translation/RejitRequest.cs b/ARMeilleure/Translation/RejitRequest.cs new file mode 100644 index 000000000..e0b0e0b92 --- /dev/null +++ b/ARMeilleure/Translation/RejitRequest.cs @@ -0,0 +1,16 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Translation +{ + struct RejitRequest + { + public ulong Address; + public ExecutionMode Mode; + + public RejitRequest(ulong address, ExecutionMode mode) + { + Address = address; + Mode = mode; + } + } +} diff --git a/ARMeilleure/Translation/SsaConstruction.cs b/ARMeilleure/Translation/SsaConstruction.cs index 292e74e36..46435f444 100644 --- a/ARMeilleure/Translation/SsaConstruction.cs +++ b/ARMeilleure/Translation/SsaConstruction.cs @@ -268,10 +268,14 @@ namespace ARMeilleure.Translation { return RegisterConsts.IntRegsCount + reg.Index; } - else /* if (reg.Type == RegisterType.Flag) */ + else if (reg.Type == RegisterType.Flag) { return RegisterConsts.IntAndVecRegsCount + reg.Index; } + else /* if (reg.Type == RegisterType.FpFlag) */ + { + return RegisterConsts.FpFlagsOffset + reg.Index; + } } private static Register GetRegisterFromId(int id) @@ -284,10 +288,14 @@ namespace ARMeilleure.Translation { return new Register(id - RegisterConsts.IntRegsCount, RegisterType.Vector); } - else /* if (id < RegisterConsts.TotalCount) */ + else if (id < RegisterConsts.FpFlagsOffset) { return new Register(id - RegisterConsts.IntAndVecRegsCount, RegisterType.Flag); } + else /* if (id < RegisterConsts.TotalCount) */ + { + return new Register(id - RegisterConsts.FpFlagsOffset, RegisterType.FpFlag); + } } } } \ No newline at end of file diff --git a/ARMeilleure/Translation/Translator.cs b/ARMeilleure/Translation/Translator.cs index 4725ca59d..3008303e7 100644 --- a/ARMeilleure/Translation/Translator.cs +++ b/ARMeilleure/Translation/Translator.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation private ConcurrentDictionary _funcs; - private PriorityQueue _backgroundQueue; + private PriorityQueue _backgroundQueue; private AutoResetEvent _backgroundTranslatorEvent; @@ -32,7 +32,7 @@ namespace ARMeilleure.Translation _funcs = new ConcurrentDictionary(); - _backgroundQueue = new PriorityQueue(2); + _backgroundQueue = new PriorityQueue(2); _backgroundTranslatorEvent = new AutoResetEvent(false); } @@ -41,11 +41,11 @@ namespace ARMeilleure.Translation { while (_threadCount != 0) { - if (_backgroundQueue.TryDequeue(out ulong address)) + if (_backgroundQueue.TryDequeue(out RejitRequest request)) { - TranslatedFunction func = Translate(address, ExecutionMode.Aarch64, highCq: true); + TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true); - _funcs.AddOrUpdate(address, func, (key, oldFunc) => func); + _funcs.AddOrUpdate(request.Address, func, (key, oldFunc) => func); } else { @@ -114,7 +114,7 @@ namespace ARMeilleure.Translation } else if (isCallTarget && func.ShouldRejit()) { - _backgroundQueue.Enqueue(0, address); + _backgroundQueue.Enqueue(0, new RejitRequest(address, mode)); _backgroundTranslatorEvent.Set(); } @@ -149,7 +149,7 @@ namespace ARMeilleure.Translation Logger.StartPass(PassName.RegisterUsage); - RegisterUsage.RunPass(cfg, isCompleteFunction: false); + RegisterUsage.RunPass(cfg, mode, isCompleteFunction: false); Logger.EndPass(PassName.RegisterUsage); diff --git a/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs b/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs new file mode 100644 index 000000000..f34b42a91 --- /dev/null +++ b/Ryujinx.Tests.Unicorn/Native/Arm32Register.cs @@ -0,0 +1,139 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Ryujinx.Tests.Unicorn.Native +{ + public enum Arm32Register + { + INVALID = 0, + + APSR, + APSR_NZCV, + CPSR, + FPEXC, + FPINST, + FPSCR, + FPSCR_NZCV, + FPSID, + ITSTATE, + LR, + PC, + SP, + SPSR, + D0, + D1, + D2, + D3, + D4, + D5, + D6, + D7, + D8, + D9, + D10, + D11, + D12, + D13, + D14, + D15, + D16, + D17, + D18, + D19, + D20, + D21, + D22, + D23, + D24, + D25, + D26, + D27, + D28, + D29, + D30, + D31, + FPINST2, + MVFR0, + MVFR1, + MVFR2, + Q0, + Q1, + Q2, + Q3, + Q4, + Q5, + Q6, + Q7, + Q8, + Q9, + Q10, + Q11, + Q12, + Q13, + Q14, + Q15, + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + S0, + S1, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + S12, + S13, + S14, + S15, + S16, + S17, + S18, + S19, + S20, + S21, + S22, + S23, + S24, + S25, + S26, + S27, + S28, + S29, + S30, + S31, + C1_C0_2, + C13_C0_2, + C13_C0_3, + IPSR, + MSP, + PSP, + CONTROL, + ENDING, + + // Alias registers. + R13 = SP, + R14 = LR, + R15 = PC, + SB = R9, + SL = R10, + FP = R11, + IP = R12, + } +} diff --git a/Ryujinx.Tests.Unicorn/UnicornAArch32.cs b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs new file mode 100644 index 000000000..d7ae90d6e --- /dev/null +++ b/Ryujinx.Tests.Unicorn/UnicornAArch32.cs @@ -0,0 +1,280 @@ +using Ryujinx.Tests.Unicorn.Native; +using System; + +namespace Ryujinx.Tests.Unicorn +{ + public class UnicornAArch32 + { + internal readonly IntPtr uc; + + public IndexedProperty R + { + get + { + return new IndexedProperty( + (int i) => GetX(i), + (int i, uint value) => SetX(i, value)); + } + } + + public IndexedProperty Q + { + get + { + return new IndexedProperty( + (int i) => GetQ(i), + (int i, SimdValue value) => SetQ(i, value)); + } + } + + public uint LR + { + get => GetRegister(Arm32Register.LR); + set => SetRegister(Arm32Register.LR, value); + } + + public uint SP + { + get => GetRegister(Arm32Register.SP); + set => SetRegister(Arm32Register.SP, value); + } + + public uint PC + { + get => GetRegister(Arm32Register.PC); + set => SetRegister(Arm32Register.PC, value); + } + + public uint APSR + { + get => (uint)GetRegister(Arm32Register.APSR); + set => SetRegister(Arm32Register.APSR, (uint)value); + } + + public int Fpscr + { + get => (int)GetRegister(Arm32Register.FPSCR) | ((int)GetRegister(Arm32Register.FPSCR_NZCV)); + set => SetRegister(Arm32Register.FPSCR, (uint)value); + } + + public bool OverflowFlag + { + get => (APSR & 0x10000000u) != 0; + set => APSR = (APSR & ~0x10000000u) | (value ? 0x10000000u : 0u); + } + + public bool CarryFlag + { + get => (APSR & 0x20000000u) != 0; + set => APSR = (APSR & ~0x20000000u) | (value ? 0x20000000u : 0u); + } + + public bool ZeroFlag + { + get => (APSR & 0x40000000u) != 0; + set => APSR = (APSR & ~0x40000000u) | (value ? 0x40000000u : 0u); + } + + public bool NegativeFlag + { + get => (APSR & 0x80000000u) != 0; + set => APSR = (APSR & ~0x80000000u) | (value ? 0x80000000u : 0u); + } + + public UnicornAArch32() + { + Interface.Checked(Interface.uc_open(UnicornArch.UC_ARCH_ARM, UnicornMode.UC_MODE_LITTLE_ENDIAN, out uc)); + + SetRegister(Arm32Register.C1_C0_2, GetRegister(Arm32Register.C1_C0_2) | 0xf00000); + SetRegister(Arm32Register.FPEXC, 0x40000000); + } + + ~UnicornAArch32() + { + Interface.Checked(Native.Interface.uc_close(uc)); + } + + public void RunForCount(ulong count) + { + Interface.Checked(Native.Interface.uc_emu_start(uc, this.PC, 0xFFFFFFFFFFFFFFFFu, 0, count)); + } + + public void Step() + { + RunForCount(1); + } + + private static Arm32Register[] XRegisters = new Arm32Register[16] + { + Arm32Register.R0, + Arm32Register.R1, + Arm32Register.R2, + Arm32Register.R3, + Arm32Register.R4, + Arm32Register.R5, + Arm32Register.R6, + Arm32Register.R7, + Arm32Register.R8, + Arm32Register.R9, + Arm32Register.R10, + Arm32Register.R11, + Arm32Register.R12, + Arm32Register.R13, + Arm32Register.R14, + Arm32Register.R15, + }; + + private static Arm32Register[] QRegisters = new Arm32Register[16] + { + Arm32Register.Q0, + Arm32Register.Q1, + Arm32Register.Q2, + Arm32Register.Q3, + Arm32Register.Q4, + Arm32Register.Q5, + Arm32Register.Q6, + Arm32Register.Q7, + Arm32Register.Q8, + Arm32Register.Q9, + Arm32Register.Q10, + Arm32Register.Q11, + Arm32Register.Q12, + Arm32Register.Q13, + Arm32Register.Q14, + Arm32Register.Q15 + }; + + public uint GetX(int index) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return GetRegister(XRegisters[index]); + } + + public void SetX(int index, uint value) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + SetRegister(XRegisters[index], value); + } + + public SimdValue GetQ(int index) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + // Getting quadword registers from Unicorn A32 seems to be broken, so we combine its 2 doubleword registers instead. + return GetVector((Arm32Register)((int)Arm32Register.D0 + index * 2)); + } + + public void SetQ(int index, SimdValue value) + { + if ((uint)index > 15) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + SetVector((Arm32Register)((int)Arm32Register.D0 + index * 2), value); + } + + public uint GetRegister(Arm32Register register) + { + byte[] data = new byte[4]; + + Interface.Checked(Native.Interface.uc_reg_read(uc, (int)register, data)); + + return (uint)BitConverter.ToInt32(data, 0); + } + + public void SetRegister(Arm32Register register, uint value) + { + byte[] data = BitConverter.GetBytes(value); + + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + } + + public SimdValue GetVector(Arm32Register register) + { + byte[] data = new byte[8]; + + Interface.Checked(Interface.uc_reg_read(uc, (int)register, data)); + ulong lo = BitConverter.ToUInt64(data, 0); + Interface.Checked(Interface.uc_reg_read(uc, (int)register + 1, data)); + ulong hi = BitConverter.ToUInt64(data, 0); + + return new SimdValue(lo, hi); + } + + private void SetVector(Arm32Register register, SimdValue value) + { + byte[] data = BitConverter.GetBytes(value.GetUInt64(0)); + Interface.Checked(Interface.uc_reg_write(uc, (int)register, data)); + data = BitConverter.GetBytes(value.GetUInt64(1)); + Interface.Checked(Interface.uc_reg_write(uc, (int)register + 1, data)); + } + + public byte[] MemoryRead(ulong address, ulong size) + { + byte[] value = new byte[size]; + + Interface.Checked(Interface.uc_mem_read(uc, address, value, size)); + + return value; + } + + public byte MemoryRead8(ulong address) => MemoryRead(address, 1)[0]; + public UInt16 MemoryRead16(ulong address) => (UInt16)BitConverter.ToInt16(MemoryRead(address, 2), 0); + public UInt32 MemoryRead32(ulong address) => (UInt32)BitConverter.ToInt32(MemoryRead(address, 4), 0); + public UInt64 MemoryRead64(ulong address) => (UInt64)BitConverter.ToInt64(MemoryRead(address, 8), 0); + + public void MemoryWrite(ulong address, byte[] value) + { + Interface.Checked(Interface.uc_mem_write(uc, address, value, (ulong)value.Length)); + } + + public void MemoryWrite8(ulong address, byte value) => MemoryWrite(address, new byte[] { value }); + public void MemoryWrite16(ulong address, Int16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite16(ulong address, UInt16 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, Int32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite32(ulong address, UInt32 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, Int64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + public void MemoryWrite64(ulong address, UInt64 value) => MemoryWrite(address, BitConverter.GetBytes(value)); + + public void MemoryMap(ulong address, ulong size, MemoryPermission permissions) + { + Interface.Checked(Interface.uc_mem_map(uc, address, size, (uint)permissions)); + } + + public void MemoryUnmap(ulong address, ulong size) + { + Interface.Checked(Interface.uc_mem_unmap(uc, address, size)); + } + + public void MemoryProtect(ulong address, ulong size, MemoryPermission permissions) + { + Interface.Checked(Interface.uc_mem_protect(uc, address, size, (uint)permissions)); + } + + public static bool IsAvailable() + { + try + { + Interface.uc_version(out _, out _); + + return true; + } + catch (DllNotFoundException) + { + return false; + } + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTest32.cs b/Ryujinx.Tests/Cpu/CpuTest32.cs new file mode 100644 index 000000000..ed1d23d37 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTest32.cs @@ -0,0 +1,530 @@ +using ARMeilleure.Memory; +using ARMeilleure.State; +using ARMeilleure.Translation; +using NUnit.Framework; +using Ryujinx.Tests.Unicorn; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Tests.Cpu +{ + [TestFixture] + public class CpuTest32 + { + private uint _currAddress; + private long _size; + + private uint _entryPoint; + + private IntPtr _ramPointer; + + private MemoryManager _memory; + + private ExecutionContext _context; + + private Translator _translator; + + private static bool _unicornAvailable; + private UnicornAArch32 _unicornEmu; + + private bool usingMemory; + + static CpuTest32() + { + _unicornAvailable = UnicornAArch32.IsAvailable(); + + if (!_unicornAvailable) + { + Console.WriteLine("WARNING: Could not find Unicorn."); + } + } + + [SetUp] + public void Setup() + { + _currAddress = 0x1000; + _size = 0x1000; + + _entryPoint = _currAddress; + + _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size * 2)); + _memory = new MemoryManager(_ramPointer, addressSpaceBits: 16, useFlatPageTable: true); + _memory.Map((long)_currAddress, 0, _size*2); + + _context = new ExecutionContext(); + _context.IsAarch32 = true; + + _translator = new Translator(_memory); + + if (_unicornAvailable) + { + _unicornEmu = new UnicornAArch32(); + _unicornEmu.MemoryMap(_currAddress, (ulong)_size, MemoryPermission.READ | MemoryPermission.EXEC); + _unicornEmu.MemoryMap((ulong)(_currAddress + _size), (ulong)_size, MemoryPermission.READ | MemoryPermission.WRITE); + _unicornEmu.PC = _entryPoint; + } + } + + [TearDown] + public void Teardown() + { + Marshal.FreeHGlobal(_ramPointer); + _memory = null; + _context = null; + _translator = null; + _unicornEmu = null; + } + + protected void Reset() + { + Teardown(); + Setup(); + } + + protected void Opcode(uint opcode) + { + _memory.WriteUInt32((long)_currAddress, opcode); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite32((ulong)_currAddress, opcode); + } + + _currAddress += 4; + } + + protected ExecutionContext GetContext() => _context; + protected void SetContext(uint r0 = 0, + uint r1 = 0, + uint r2 = 0, + uint r3 = 0, + uint sp = 0, + V128 v0 = default, + V128 v1 = default, + V128 v2 = default, + V128 v3 = default, + V128 v4 = default, + V128 v5 = default, + V128 v14 = default, + V128 v15 = default, + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpscr = 0) + { + _context.SetX(0, r0); + _context.SetX(1, r1); + _context.SetX(2, r2); + _context.SetX(3, r3); + + _context.SetX(0xd, sp); + + _context.SetV(0, v0); + _context.SetV(1, v1); + _context.SetV(2, v2); + _context.SetV(3, v3); + _context.SetV(4, v4); + _context.SetV(5, v5); + _context.SetV(14, v14); + _context.SetV(15, v15); + + _context.SetPstateFlag(PState.VFlag, overflow); + _context.SetPstateFlag(PState.CFlag, carry); + _context.SetPstateFlag(PState.ZFlag, zero); + _context.SetPstateFlag(PState.NFlag, negative); + + _context.Fpsr = FPSR.A32Mask & (FPSR)fpscr; + _context.Fpcr = FPCR.A32Mask & (FPCR)fpscr; + + if (_unicornAvailable) + { + _unicornEmu.R[0] = r0; + _unicornEmu.R[1] = r1; + _unicornEmu.R[2] = r2; + _unicornEmu.R[3] = r3; + + _unicornEmu.SP = sp; + + _unicornEmu.Q[0] = V128ToSimdValue(v0); + _unicornEmu.Q[1] = V128ToSimdValue(v1); + _unicornEmu.Q[2] = V128ToSimdValue(v2); + _unicornEmu.Q[3] = V128ToSimdValue(v3); + _unicornEmu.Q[4] = V128ToSimdValue(v4); + _unicornEmu.Q[5] = V128ToSimdValue(v5); + _unicornEmu.Q[14] = V128ToSimdValue(v14); + _unicornEmu.Q[15] = V128ToSimdValue(v15); + + _unicornEmu.OverflowFlag = overflow; + _unicornEmu.CarryFlag = carry; + _unicornEmu.ZeroFlag = zero; + _unicornEmu.NegativeFlag = negative; + + _unicornEmu.Fpscr = fpscr; + } + } + + protected void ExecuteOpcodes() + { + _translator.Execute(_context, _entryPoint); + + if (_unicornAvailable) + { + _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4); + } + } + + protected ExecutionContext SingleOpcode(uint opcode, + uint r0 = 0, + uint r1 = 0, + uint r2 = 0, + uint r3 = 0, + uint sp = 0, + V128 v0 = default, + V128 v1 = default, + V128 v2 = default, + V128 v3 = default, + V128 v4 = default, + V128 v5 = default, + V128 v14 = default, + V128 v15 = default, + bool overflow = false, + bool carry = false, + bool zero = false, + bool negative = false, + int fpscr = 0, + bool copyFpFlags = false) + { + Opcode(opcode); + if (copyFpFlags) + { + Opcode(0xeef1fa10); + } + Opcode(0xe12fff1e); // BX LR + SetContext(r0, r1, r2, r3, sp, v0, v1, v2, v3, v4, v5, v14, v15, overflow, carry, zero, negative, fpscr); + ExecuteOpcodes(); + + return GetContext(); + } + + protected void SetWorkingMemory(byte[] data) + { + _memory.WriteBytes(0x2000, data); + + if (_unicornAvailable) + { + _unicornEmu.MemoryWrite((ulong)(0x2000), data); + } + + usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too. + } + + /// Rounding Mode control field. + public enum RMode + { + /// Round to Nearest mode. + Rn, + /// Round towards Plus Infinity mode. + Rp, + /// Round towards Minus Infinity mode. + Rm, + /// Round towards Zero mode. + Rz + }; + + /// Floating-point Control Register. + protected enum Fpcr + { + /// Rounding Mode control field. + RMode = 22, + /// Flush-to-zero mode control bit. + Fz = 24, + /// Default NaN mode control bit. + Dn = 25, + /// Alternative half-precision control bit. + Ahp = 26 + } + + /// Floating-point Status Register. + [Flags] + protected enum Fpsr + { + None = 0, + + /// Invalid Operation cumulative floating-point exception bit. + Ioc = 1 << 0, + /// Divide by Zero cumulative floating-point exception bit. + Dzc = 1 << 1, + /// Overflow cumulative floating-point exception bit. + Ofc = 1 << 2, + /// Underflow cumulative floating-point exception bit. + Ufc = 1 << 3, + /// Inexact cumulative floating-point exception bit. + Ixc = 1 << 4, + /// Input Denormal cumulative floating-point exception bit. + Idc = 1 << 7, + + /// Cumulative saturation bit. + Qc = 1 << 27, + + /// NZCV flags + Nzcv = (1 << 28) | (1 << 29) | (1 << 30) | (1 << 31) + } + + [Flags] + protected enum FpSkips + { + None = 0, + + IfNaNS = 1, + IfNaND = 2, + + IfUnderflow = 4, + IfOverflow = 8 + } + + protected enum FpTolerances + { + None, + + UpToOneUlpsS, + UpToOneUlpsD + } + + protected void CompareAgainstUnicorn( + Fpsr fpsrMask = Fpsr.None, + FpSkips fpSkips = FpSkips.None, + FpTolerances fpTolerances = FpTolerances.None) + { + if (!_unicornAvailable) + { + return; + } + + if (fpSkips != FpSkips.None) + { + ManageFpSkips(fpSkips); + } + + Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.R[0])); + Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.R[1])); + Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.R[2])); + Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.R[3])); + Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.R[4])); + Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.R[5])); + Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.R[6])); + Assert.That(_context.GetX(7), Is.EqualTo(_unicornEmu.R[7])); + Assert.That(_context.GetX(8), Is.EqualTo(_unicornEmu.R[8])); + Assert.That(_context.GetX(9), Is.EqualTo(_unicornEmu.R[9])); + Assert.That(_context.GetX(10), Is.EqualTo(_unicornEmu.R[10])); + Assert.That(_context.GetX(11), Is.EqualTo(_unicornEmu.R[11])); + Assert.That(_context.GetX(12), Is.EqualTo(_unicornEmu.R[12])); + Assert.That(_context.GetX(13), Is.EqualTo(_unicornEmu.R[13])); + Assert.That(_context.GetX(14), Is.EqualTo(_unicornEmu.R[14])); + + if (fpTolerances == FpTolerances.None) + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + else + { + ManageFpTolerances(fpTolerances); + } + Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1])); + Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2])); + Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3])); + Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4])); + Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5])); + Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6])); + Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7])); + Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8])); + Assert.That(V128ToSimdValue(_context.GetV(9)), Is.EqualTo(_unicornEmu.Q[9])); + Assert.That(V128ToSimdValue(_context.GetV(10)), Is.EqualTo(_unicornEmu.Q[10])); + Assert.That(V128ToSimdValue(_context.GetV(11)), Is.EqualTo(_unicornEmu.Q[11])); + Assert.That(V128ToSimdValue(_context.GetV(12)), Is.EqualTo(_unicornEmu.Q[12])); + Assert.That(V128ToSimdValue(_context.GetV(13)), Is.EqualTo(_unicornEmu.Q[13])); + Assert.That(V128ToSimdValue(_context.GetV(14)), Is.EqualTo(_unicornEmu.Q[14])); + Assert.That(V128ToSimdValue(_context.GetV(15)), Is.EqualTo(_unicornEmu.Q[15])); + + Assert.That((int)_context.Fpcr | ((int)_context.Fpsr & (int)fpsrMask), Is.EqualTo(_unicornEmu.Fpscr)); + + Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag)); + Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag)); + Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag)); + Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag)); + + if (usingMemory) + { + byte[] meilleureMem = _memory.ReadBytes((long)(0x2000), _size); + byte[] unicornMem = _unicornEmu.MemoryRead((ulong)(0x2000), (ulong)_size); + + for (int i = 0; i < _size; i++) + { + Assert.AreEqual(meilleureMem[i], unicornMem[i]); + } + } + } + + private void ManageFpSkips(FpSkips fpSkips) + { + if (fpSkips.HasFlag(FpSkips.IfNaNS)) + { + if (float.IsNaN(_unicornEmu.Q[0].AsFloat())) + { + Assert.Ignore("NaN test."); + } + } + else if (fpSkips.HasFlag(FpSkips.IfNaND)) + { + if (double.IsNaN(_unicornEmu.Q[0].AsDouble())) + { + Assert.Ignore("NaN test."); + } + } + + if (fpSkips.HasFlag(FpSkips.IfUnderflow)) + { + if ((_unicornEmu.Fpscr & (int)Fpsr.Ufc) != 0) + { + Assert.Ignore("Underflow test."); + } + } + + if (fpSkips.HasFlag(FpSkips.IfOverflow)) + { + if ((_unicornEmu.Fpscr & (int)Fpsr.Ofc) != 0) + { + Assert.Ignore("Overflow test."); + } + } + } + + private void ManageFpTolerances(FpTolerances fpTolerances) + { + bool IsNormalOrSubnormalS(float f) => float.IsNormal(f) || float.IsSubnormal(f); + bool IsNormalOrSubnormalD(double d) => double.IsNormal(d) || double.IsSubnormal(d); + + if (!Is.EqualTo(_unicornEmu.Q[0]).ApplyTo(V128ToSimdValue(_context.GetV(0))).IsSuccess) + { + if (fpTolerances == FpTolerances.UpToOneUlpsS) + { + if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) && + IsNormalOrSubnormalS(_context.GetV(0).AsFloat())) + { + Assert.That(_context.GetV(0).GetFloat(0), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(1), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(2), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetFloat(3), + Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps); + + Console.WriteLine(fpTolerances); + } + else + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + } + + if (fpTolerances == FpTolerances.UpToOneUlpsD) + { + if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) && + IsNormalOrSubnormalD(_context.GetV(0).AsDouble())) + { + Assert.That(_context.GetV(0).GetDouble(0), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps); + Assert.That(_context.GetV(0).GetDouble(1), + Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps); + + Console.WriteLine(fpTolerances); + } + else + { + Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0])); + } + } + } + } + + private static SimdValue V128ToSimdValue(V128 value) + { + return new SimdValue(value.GetUInt64(0), value.GetUInt64(1)); + } + + protected static V128 MakeVectorScalar(float value) => new V128(value); + protected static V128 MakeVectorScalar(double value) => new V128(value); + + protected static V128 MakeVectorE0(ulong e0) => new V128(e0, 0); + protected static V128 MakeVectorE1(ulong e1) => new V128(0, e1); + + protected static V128 MakeVectorE0E1(ulong e0, ulong e1) => new V128(e0, e1); + + protected static ulong GetVectorE0(V128 vector) => vector.GetUInt64(0); + protected static ulong GetVectorE1(V128 vector) => vector.GetUInt64(1); + + protected static ushort GenNormalH() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUShort(); + while ((rnd & 0x7C00u) == 0u || + (~rnd & 0x7C00u) == 0u); + + return (ushort)rnd; + } + + protected static ushort GenSubnormalH() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUShort(); + while ((rnd & 0x03FFu) == 0u); + + return (ushort)(rnd & 0x83FFu); + } + + protected static uint GenNormalS() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((rnd & 0x7F800000u) == 0u || + (~rnd & 0x7F800000u) == 0u); + + return rnd; + } + + protected static uint GenSubnormalS() + { + uint rnd; + + do rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((rnd & 0x007FFFFFu) == 0u); + + return rnd & 0x807FFFFFu; + } + + protected static ulong GenNormalD() + { + ulong rnd; + + do rnd = TestContext.CurrentContext.Random.NextULong(); + while ((rnd & 0x7FF0000000000000ul) == 0ul || + (~rnd & 0x7FF0000000000000ul) == 0ul); + + return rnd; + } + + protected static ulong GenSubnormalD() + { + ulong rnd; + + do rnd = TestContext.CurrentContext.Random.NextULong(); + while ((rnd & 0x000FFFFFFFFFFFFFul) == 0ul); + + return rnd & 0x800FFFFFFFFFFFFFul; + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestAlu32.cs b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs new file mode 100644 index 000000000..145417ae2 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAlu32.cs @@ -0,0 +1,61 @@ +#define Alu32 + +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("Alu32")] + public sealed class CpuTestAlu32 : CpuTest32 + { +#if Alu32 + +#region "ValueSource (Opcodes)" + private static uint[] _Lsr_Lsl_Asr_Ror_() + { + return new uint[] + { + 0xe1b00030u, // LSRS R0, R0, R0 + 0xe1b00010u, // LSLS R0, R0, R0 + 0xe1b00050u, // ASRS R0, R0, R0 + 0xe1b00070u // RORS R0, R0, R0 + }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("RBIT , ")] + public void Rbit_32bit([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn) + { + uint opcode = 0xe6ff0f30u; // RBIT R0, R0 + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12); + + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, sp: w31); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Lsr_Lsl_Asr_Ror([ValueSource("_Lsr_Lsl_Asr_Ror_")] uint opcode, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint shiftValue, + [Range(0, 31)] [Values(32, 256, 768, -1, -23)] int shiftAmount) + { + uint rd = 0; + uint rm = 1; + uint rs = 2; + opcode |= ((rm & 15) << 0) | ((rd & 15) << 12) | ((rs & 15) << 8); + + SingleOpcode(opcode, r1: shiftValue, r2: (uint)shiftAmount); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs new file mode 100644 index 000000000..25b2c9687 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestAluRs32.cs @@ -0,0 +1,84 @@ +#define AluRs32 + +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("AluRs32")] + public sealed class CpuTestAluRs32 : CpuTest32 + { +#if AluRs32 + +#region "ValueSource (Opcodes)" + private static uint[] _Add_Adds_Rsb_Rsbs_() + { + return new uint[] + { + 0xe0800000u, // ADD R0, R0, R0, LSL #0 + 0xe0900000u, // ADDS R0, R0, R0, LSL #0 + 0xe0600000u, // RSB R0, R0, R0, LSL #0 + 0xe0700000u // RSBS R0, R0, R0, LSL #0 + }; + } + + private static uint[] _Adc_Adcs_Rsc_Rscs_Sbc_Sbcs_() + { + return new uint[] + { + 0xe0a00000u, // ADC R0, R0, R0 + 0xe0b00000u, // ADCS R0, R0, R0 + 0xe0e00000u, // RSC R0, R0, R0 + 0xe0f00000u, // RSCS R0, R0, R0 + 0xe0c00000u, // SBC R0, R0, R0 + 0xe0d00000u // SBCS R0, R0, R0 + }; + } +#endregion + + private const int RndCnt = 2; + private const int RndCntAmount = 2; + + [Test, Pairwise] + public void Adc_Adcs_Rsc_Rscs_Sbc_Sbcs([ValueSource("_Adc_Adcs_Rsc_Rscs_Sbc_Sbcs_")] uint opcode, + [Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values] bool carryIn) + { + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp, carry: carryIn); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void Add_Adds_Rsb_Rsbs([ValueSource("_Add_Adds_Rsb_Rsbs_")] uint opcode, + [Values(0u, 13u)] uint rd, + [Values(1u, 13u)] uint rn, + [Values(2u, 13u)] uint rm, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wm, + [Values(0b00u, 0b01u, 0b10u, 0b11u)] uint shift, // + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntAmount)] uint amount) + { + opcode |= ((rm & 15) << 0) | ((rn & 15) << 16) | ((rd & 15) << 12); + opcode |= ((shift & 3) << 5) | ((amount & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r1: wn, r2: wm, sp: sp); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestBf32.cs b/Ryujinx.Tests/Cpu/CpuTestBf32.cs new file mode 100644 index 000000000..66b8fc062 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestBf32.cs @@ -0,0 +1,108 @@ +#define Bf32 + +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("Bf32")] + public sealed class CpuTestBf32 : CpuTest32 + { +#if Bf32 + private const int RndCnt = 2; + private const int RndCntImmr = 2; + private const int RndCntImms = 2; + + [Test, Pairwise, Description("BFC , #, #")] + public void Bfc([Values(0u, 0xdu)] uint rd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wd, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // Don't test unpredictable for now. + uint opcode = 0xe7c0001fu; // BFC R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("BFI , , #, #")] + public void Bfi([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint msb) + { + msb = Math.Max(lsb, msb); // Don't test unpredictable for now. + uint opcode = 0xe7c00010u; // BFI R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((msb & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UBFX , , #, #")] + public void Ubfx([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint widthm1) + { + if (lsb + widthm1 > 31) + { + widthm1 -= (lsb + widthm1) - 31; + } + uint opcode = 0xe7e00050u; // UBFX R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((widthm1 & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SBFX , , #, #")] + public void Sbfx([Values(0u, 0xdu)] uint rd, + [Values(1u, 0xdu)] uint rn, + [Random(RndCnt)] uint wd, + [Values(0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu)] [Random(RndCnt)] uint wn, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImmr)] uint lsb, + [Values(0u, 15u, 16u, 31u)] [Random(0u, 31u, RndCntImms)] uint widthm1) + { + if (lsb + widthm1 > 31) + { + widthm1 -= (lsb + widthm1) - 31; + } + uint opcode = 0xe7a00050u; // SBFX R0, R0, #0, #1 + opcode |= ((rd & 0xf) << 12); + opcode |= ((rn & 0xf) << 0); + opcode |= ((widthm1 & 31) << 16) | ((lsb & 31) << 7); + + uint sp = TestContext.CurrentContext.Random.NextUInt(); + + SingleOpcode(opcode, r0: wd, r1: wn, sp: sp); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs new file mode 100644 index 000000000..dfbd3b0bd --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs @@ -0,0 +1,61 @@ +#define SimdLogical32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdLogical32")] + public sealed class CpuTestSimdLogical32 : CpuTest32 + { +#if SimdLogical32 + +#region "ValueSource (Opcodes)" + private static uint[] _Vbif_Vbit_Vbsl_Vand_() + { + return new uint[] + { + 0xf3300110u, // VBIF D0, D0, D0 + 0xf3200110u, // VBIT D0, D0, D0 + 0xf3100110u, // VBSL D0, D0, D0 + 0xf2000110u // VAND D0, D0, D0 + }; + } + #endregion + + private const int RndCnt = 2; + + [Test, Pairwise] + public void Vbif_Vbit_Vbsl_Vand([ValueSource("_Vbif_Vbit_Vbsl_Vand_")] uint opcode, + [Range(0u, 4u)] uint rd, + [Range(0u, 4u)] uint rn, + [Range(0u, 4u)] uint rm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs new file mode 100644 index 000000000..eb27d95fb --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMemory32.cs @@ -0,0 +1,319 @@ +#define SimdMemory32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdMemory32")] + public sealed class CpuTestSimdMemory32 : CpuTest32 + { +#if SimdMemory32 + private const int RndCntImm = 2; + + private uint[] LDSTModes = + { + // LD1 + 0b0111, + 0b1010, + 0b0110, + 0b0010, + + // LD2 + 0b1000, + 0b1001, + 0b0011, + + // LD3 + 0b0100, + 0b0101, + + // LD4 + 0b0000, + 0b0001 + }; + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vldn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4a00000u; // VLD1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc. + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (all lanes)")] + public void Vldn_All([Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint n, + [Range(0u, 2u)] uint size, + [Values] bool t, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4a00c00u; // VLD1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // LD1 is 0, LD2 is 1 etc. + if (t) opcode |= 1 << 5; + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vldn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xf4200000u; // VLD4.8 {D0, D1, D2, D3}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTn. , [ {:}]{ /!/, } (single n element structure)")] + public void Vstn_Single([Values(0u, 1u, 2u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 7u)] uint index, + [Range(0u, 3u)] uint n, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4800000u; // VST1.8 {D0[0]}, [R0], R0 + + opcode |= ((size & 3) << 10) | ((rn & 15) << 16) | (rm & 15); + + uint index_align = (index << (int)(1 + size)) & 15; + + opcode |= (index_align) << 4; + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= (n & 3) << 8; // ST1 is 0, ST2 is 1 etc. + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTn. , [ {:}]{ /!/, } (multiple n element structures)")] + public void Vstn_Pair([Values(0u, 1u, 2u, 3u)] uint size, + [Values(0u, 13u)] uint rn, + [Values(1u, 13u, 15u)] uint rm, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 3u)] uint mode, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint offset) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + (V128 vec1, V128 vec2, V128 vec3, V128 vec4) = GenerateTestVectors(); + + uint opcode = 0xf4000000u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 + + opcode |= ((size & 3) << 6) | ((rn & 15) << 16) | (rm & 15) | (LDSTModes[mode] << 8); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode, r0: 0x2500, r1: offset, v1: vec1, v2: vec2, v3: vec3, v4: vec4, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDM. {!}, ")] + public void Vldm([Values(0u, 13u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint vd, + [Range(0u, 2u)] uint mode, + [Values(0x1u, 0x32u)] [Random(2u, 31u, RndCntImm)] uint regs, + [Values] bool single) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xec100a00u; // VST4.8 {D0, D1, D2, D3}, [R0], R0 + + uint[] vldmModes = { + // Note: 3rd 0 leaves a space for "D". + 0b0100, // Increment after. + 0b0101, // Increment after. (!) + 0b1001 // Decrement before. (!) + }; + + opcode |= ((vldmModes[mode] & 15) << 21); + opcode |= ((rn & 15) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + opcode |= ((uint)(single ? 0 : 1) << 8); + + if (!single) regs = (regs << 1); // Low bit must be 0 - must be even number of registers. + uint regSize = single ? 1u : 2u; + + if (vd + (regs / regSize) > 32) // Can't address further than S31 or D31. + { + regs -= (vd + (regs / regSize)) - 32; + } + + if (regs / regSize > 16) // Can't do more than 16 registers at a time. + { + regs = 16 * regSize; + } + + opcode |= regs & 0xff; + + SingleOpcode(opcode, r0: 0x2500, sp: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VLDR. , [ {, #{+/-}}]")] + public void Vldr([Values(2u, 3u)] uint size, // FP16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed900a00u; // VLDR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= imm & 0xff; + + SingleOpcode(opcode, r0: 0x2500); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSTR. , [ {, #{+/-}}]")] + public void Vstr([Values(2u, 3u)] uint size, // FP16 is not supported for now + [Values(0u)] uint rn, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint sd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm, + [Values] bool sub) + { + var data = GenerateVectorSequence(0x1000); + SetWorkingMemory(data); + + uint opcode = 0xed800a00u; // VSTR.32 S0, [R0, #0] + opcode |= ((size & 3) << 8) | ((rn & 15) << 16); + + if (sub) + { + opcode &= ~(uint)(1 << 23); + } + + if (size == 2) + { + opcode |= ((sd & 0x1) << 22); + opcode |= ((sd & 0x1e) << 11); + } + else + { + opcode |= ((sd & 0x10) << 18); + opcode |= ((sd & 0xf) << 12); + } + opcode |= imm & 0xff; + + (V128 vec1, V128 vec2, _, _) = GenerateTestVectors(); + + SingleOpcode(opcode, r0: 0x2500, v0: vec1, v1: vec2); + + CompareAgainstUnicorn(); + } + + private (V128, V128, V128, V128) GenerateTestVectors() + { + return ( + new V128(-12.43f, 1872.23f, 4456.23f, -5622.2f), + new V128(0.0f, float.NaN, float.PositiveInfinity, float.NegativeInfinity), + new V128(1.23e10f, -0.0f, -0.123f, 0.123f), + new V128(float.Epsilon, 3.5f, 925.23f, -104.9f) + ); + } + + private byte[] GenerateVectorSequence(int length) + { + int floatLength = length >> 2; + float[] data = new float[floatLength]; + + for (int i = 0; i < floatLength; i++) + { + data[i] = i + (i / 9f); + } + + var result = new byte[length]; + Buffer.BlockCopy(data, 0, result, 0, result.Length); + return result; + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs new file mode 100644 index 000000000..13d610788 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -0,0 +1,494 @@ +#define SimdMov32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdMov32")] + public sealed class CpuTestSimdMov32 : CpuTest32 + { +#if SimdMov32 + private const int RndCntImm = 2; + + [Test, Pairwise, Description("VMOV.I
, #")] + public void Movi_V([Range(0u, 10u)] uint variant, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(1u, 0xffu, RndCntImm)] uint imm, + [Values] bool q) + { + uint[] variants = + { + // I32 + 0b0000_0, + 0b0010_0, + 0b0100_0, + 0b0110_0, + + // I16 + 0b1000_0, + 0b1010_0, + + // DT + 0b1100_0, + 0b1101_0, + 0b1110_0, + 0b1111_0, + + 0b1110_1 + }; + + + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 + uint cmodeOp = variants[variant]; + + if (q) + { + vd <<= 1; + } + + opcode |= ((cmodeOp & 1) << 5) | ((cmodeOp & 0x1e) << 7); + opcode |= ((q ? 1u : 0u) << 6); + opcode |= (imm & 0xf) | ((imm & 0x70) << 12) | ((imm & 0x80) << 16); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + SingleOpcode(opcode); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV.F , #")] + public void Movi_S([Range(2u, 3u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0x0u)] [Random(0u, 0xffu, RndCntImm)] uint imm) + { + uint opcode = 0xeeb00800u; + opcode |= (size & 3) << 8; + opcode |= (imm & 0xf) | ((imm & 0xf0) << 12); + + if (size == 2) + { + opcode |= ((vd & 0x1) << 22); + opcode |= ((vd & 0x1e) << 11); + } + else + { + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + } + + SingleOpcode(opcode); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV , ")] + public void Mov_GP([Values(0u, 1u, 2u, 3u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Random(RndCntImm)] uint valueRn, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xee000a10u; // VMOV S0, R0 + opcode |= (vn & 1) << 7; + opcode |= (vn & 0x1e) << 15; + opcode |= (rt & 0xf) << 12; + + if (op) opcode |= 1 << 20; + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOV. , ")] + public void Mov_GP_Elem([Range(0u, 7u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Range(0u, 2u)] uint size, + [Range(0u, 7u)] uint index, + [Random(1)] uint valueRn, + [Random(1)] ulong valueVn1, + [Random(1)] ulong valueVn2, + [Values] bool op, + [Values] bool u) + { + uint opcode = 0xee000b10u; // VMOV.32 D0[0], R0 + + uint opEncode = 0b01000; + switch (size) + { + case 0: + opEncode = (0b1000) | index & 7; + break; + case 1: + opEncode = (0b0001) | ((index & 3) << 1); + break; + case 2: + opEncode = (index & 1) << 2; + break; + } + + opcode |= ((opEncode >> 2) << 21) | ((opEncode & 3) << 5); + + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (rt & 0xf) << 12; + + if (op) + { + opcode |= 1 << 20; + if (u && size != 2) + { + opcode |= 1 << 23; + } + } + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2), v1: new V128(valueVn2, valueVn1)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("(VMOV , , ), (VMOV , , )")] + public void Mov_GP_D([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u, 3u)] uint rt2, + [Random(RndCntImm)] uint valueRt1, + [Random(RndCntImm)] uint valueRt2, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xec400b10u; // VMOV D0, R0, R0 + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (rt & 0xf) << 12; + opcode |= (rt2 & 0xf) << 16; + + if (op) + { + opcode |= 1 << 20; + } + + SingleOpcode(opcode, r0: valueRt1, r1: valueRt2, r2: valueRt1, r3: valueRt2, v0: new V128(valueVn1, valueVn2)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("(VMOV , , , ), (VMOV , , , )")] + public void Mov_GP_2([Range(0u, 7u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u, 3u)] uint rt2, + [Random(RndCntImm)] uint valueRt1, + [Random(RndCntImm)] uint valueRt2, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool op) + { + uint opcode = 0xec400a10u; // VMOV S0, S1, R0, R0 + opcode |= (vm & 1) << 5; + opcode |= (vm & 0x1e) >> 1; + opcode |= (rt & 0xf) << 12; + opcode |= (rt2 & 0xf) << 16; + + if (op) + { + opcode |= 1 << 20; + } + + SingleOpcode(opcode, r0: valueRt1, r1: valueRt2, r2: valueRt1, r3: valueRt2, v0: new V128(valueVn1, valueVn2), v1: new V128(valueVn2, valueVn1)); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VMOVN.
, ")] + public void Movn_V([Range(0u, 1u, 2u)] uint size, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 2u, 4u, 8u)] uint vm) + { + uint opcode = 0xf3b20200u; // VMOVN.I16 D0, Q0 + + opcode |= (size & 0x3) << 18; + opcode |= ((vm & 0x10) << 1); + opcode |= ((vm & 0xf) << 0); + + opcode |= ((vd & 0x10) << 18); + opcode |= ((vd & 0xf) << 12); + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VTRN. , ")] + public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20080u; // VTRN.8 D0, D0 + if (vm == vd) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VZIP. , ")] + public void Vzip([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20180u; // VZIP.8 D0, D0 + if (vm == vd || (size == 2 && !q)) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VUZP. , ")] + public void Vuzp([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u)] uint size, + [Values] bool q) + { + uint opcode = 0xf3b20100u; // VUZP.8 d0, d0 + if (vm == vd || (size == 2 && !q)) + { + return; // Undefined. + } + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (size & 0x3) << 18; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VTBL.8
, {list}, ")] + public void Vtbl([Range(0u, 6u)] uint vm, // Indices, include potentially invalid. + [Range(4u, 12u)] uint vn, // Selection. + [Values(0u, 1u)] uint vd, // Destinations. + [Range(0u, 3u)] uint length, + [Values] bool x) + { + uint opcode = 0xf3b00800u; // VTBL.8 D0, {D0}, D0 + if (vn + length > 31) + { + return; // Undefined. + } + + if (x) + { + opcode |= 1 << 6; + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (length & 0x3) << 8; + + var rnd = TestContext.CurrentContext.Random; + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v4 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v5 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + byte maxIndex = (byte)(length * 8 - 1); + byte[] b0 = new byte[16]; + byte[] b1 = new byte[16]; + for (int i=0; i<16; i++) + { + b0[i] = rnd.NextByte(maxIndex); + b1[i] = rnd.NextByte(maxIndex); + } + + V128 v0 = new V128(b0); + V128 v1 = new V128(b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3, v4: v4, v5: v5); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VEXT.8 {,} , , #")] + public void Vext([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u, 3u)] uint vn, + [Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 15u)] uint imm4, + [Values] bool q) + { + uint opcode = 0xf2b00000; // VEXT.32 D0, D0, D0, #0 + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; vm <<= 1; vn <<= 1; + } + else if (imm4 > 7) + { + return; // Undefined. + } + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (vn & 0x10) << 3; + opcode |= (vn & 0xf) << 16; + opcode |= (imm4 & 0xf) << 8; + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VDUP. , ")] + public void Vdup_GP([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint rt, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCntImm)] uint valueRn, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool q) + { + uint opcode = 0xee800b10; // VDUP.32 d0, r0 + + if (q) + { + opcode |= 1 << 21; + vd <<= 1; + } + + opcode |= (vd & 0x10) << 3; + opcode |= (vd & 0xf) << 16; + opcode |= (rt & 0xf) << 12; + + opcode |= (size & 1) << 5; // E + opcode |= (size & 2) << 21; // B + + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, r0: valueRn, r1: valueRn, r2: valueRn, r3: valueRn, v0: new V128(valueVn1, valueVn2), v1: v1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VDUP. , ")] + public void Vdup_S([Values(0u, 1u, 2u, 3u)] uint vd, + [Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 1u, 2u)] uint size, + [Range(0u, 7u)] uint index, + [Random(RndCntImm)] ulong valueVn1, + [Random(RndCntImm)] ulong valueVn2, + [Values] bool q) + { + uint opcode = 0xf3b00c00; + + if (q) + { + opcode |= 1 << 6; + vd <<= 1; + } + + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + + uint imm4 = 0; + switch (size) + { + case 0: + imm4 |= 0b0100 | ((index & 1) << 3); + break; + case 1: + imm4 |= 0b0010 | ((index & 3) << 2); + break; + case 2: + imm4 |= 0b0001 | ((index & 7) << 1); + break; + } + + opcode |= imm4 << 16; + + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: new V128(valueVn1, valueVn2), v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs new file mode 100644 index 000000000..a3ba93691 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -0,0 +1,351 @@ +#define SimdReg32 + +using ARMeilleure.State; +using NUnit.Framework; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdReg32")] + public sealed class CpuTestSimdReg32 : CpuTest32 + { +#if SimdReg32 + +#region "ValueSource (Types)" + private static ulong[] _1B1H1S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, + 0x0000000000000080ul, 0x00000000000000FFul, + 0x0000000000007FFFul, 0x0000000000008000ul, + 0x000000000000FFFFul, 0x000000007FFFFFFFul, + 0x0000000080000000ul, 0x00000000FFFFFFFFul, + 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul, + 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _1H1S_() + { + return new ulong[] { 0x0000000000000000ul, 0x0000000000007FFFul, + 0x0000000000008000ul, 0x000000000000FFFFul, + 0x000000007FFFFFFFul, 0x0000000080000000ul, + 0x00000000FFFFFFFFul }; + } + + private static ulong[] _4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (rnd1 << 32) | rnd1; + yield return (rnd2 << 32) | rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int cnt = 1; cnt <= RndCnt; cnt++) + { + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; + } + } +#endregion + + private const int RndCnt = 2; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Explicit] + [Test, Pairwise, Description("VADD.f32 V0, V0, V0")] + public void Vadd_f32([Values(0u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 2u)] uint rm, + [ValueSource("_2S_F_")] ulong z0, + [ValueSource("_2S_F_")] ulong z1, + [ValueSource("_2S_F_")] ulong a0, + [ValueSource("_2S_F_")] ulong a1, + [ValueSource("_2S_F_")] ulong b0, + [ValueSource("_2S_F_")] ulong b1, + [Values] bool q) + { + uint opcode = 0xf2000d00u; // VADD.F32 D0, D0, D0 + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + V128 v0 = MakeVectorE0E1(z0, z1); + V128 v1 = MakeVectorE0E1(a0, a1); + V128 v2 = MakeVectorE0E1(b0, b1); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VCMP.f Vd, Vm")] + public void Vcmp([Values(2u, 3u)] uint size, + [ValueSource("_1S_F_")] ulong a, + [ValueSource("_1S_F_")] ulong b, + [Values] bool e) + { + uint opcode = 0xeeb40840u; + uint rm = 1; + uint rd = 2; + + if (size == 3) + { + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + } + else + { + opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); + opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); + } + + opcode |= ((size & 3) << 8); + if (e) + { + opcode |= 1 << 7; + } + + V128 v1 = MakeVectorE0(a); + V128 v2 = MakeVectorE0(b); + + bool v = TestContext.CurrentContext.Random.NextBool(); + bool c = TestContext.CurrentContext.Random.NextBool(); + bool z = TestContext.CurrentContext.Random.NextBool(); + bool n = TestContext.CurrentContext.Random.NextBool(); + + int fpscr = (int)(TestContext.CurrentContext.Random.NextUInt(0xf) << 28); + + SingleOpcode(opcode, v1: v1, v2: v2, overflow: v, carry: c, zero: z, negative: n, fpscr: fpscr, copyFpFlags: true); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHL. {}, , ")] + public void Vshl([Values(0u)] uint rd, + [Values(1u, 0u)] uint rn, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q, + [Values] bool u) + { + uint opcode = 0xf2000400u; // VSHL.S8 D0, D0, D0 + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rn <<= 1; + rd <<= 1; + } + + if (u) + { + opcode |= 1 << 24; + } + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + opcode |= size << 20; + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Explicit] + [Test, Pairwise, Description("VPADD.f32 V0, V0, V0")] + public void Vpadd_f32([Values(0u)] uint rd, + [Range(0u, 7u)] uint rn, + [Range(0u, 7u)] uint rm) + { + // not currently a slow path test - just a sanity check for pairwise + uint opcode = 0xf3000d00u; // VPADD.F32 D0, D0, D0 + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + + var rnd = TestContext.CurrentContext.Random; + V128 v0 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + V128 v1 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + V128 v2 = new V128(rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue), rnd.NextFloat(int.MinValue, int.MaxValue)); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs new file mode 100644 index 000000000..6c7b0493b --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm32.cs @@ -0,0 +1,116 @@ +#define SimdShImm32 + +using ARMeilleure.State; +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdShImm32")] + public sealed class CpuTestSimdShImm32 : CpuTest32 + { +#if SimdShImm32 + private const int RndCnt = 2; + + [Test, Pairwise, Description("VSHL. {}, , #")] + public void Vshl_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) + { + uint opcode = 0xf2800510u; // VORR.I32 D0, #0 (immediate value changes it into SHL) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHR. {}, , #")] + public void Vshr_Imm([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u, 3u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool u, + [Values] bool q) + { + uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR) + if (q) + { + opcode |= 1 << 6; + rm <<= 1; + rd <<= 1; + } + + if (u) + { + opcode |= 1 << 24; + } + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("VSHRN. {}, , #")] + public void Vshrn_Imm([Values(0u, 1u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0u, 1u, 2u)] uint size, + [Random(RndCnt), Values(0u)] uint shiftImm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b) + { + uint opcode = 0xf2800810u; // VMOV.I16 D0, #0 (immediate value changes it into SHRN) + + uint imm = 1u << ((int)size + 3); + imm |= shiftImm & (imm - 1); + + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((imm & 0x3f) << 16); + + V128 v0 = MakeVectorE0E1(z, z); + V128 v1 = MakeVectorE0E1(a, z); + V128 v2 = MakeVectorE0E1(b, z); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } +#endif + } +}